41 files changed, 3830 insertions, 2088 deletions
diff --git a/fs/coda/dir.c b/fs/coda/dir.c
index 9ddf5ed62162..898a86dde8f5 100644
--- a/fs/coda/dir.c
+++ b/fs/coda/dir.c
@@ -470,7 +470,7 @@ int coda_readdir(struct file *coda_file, void *dirent, filldir_t filldir)
 
 		ret = -ENOENT;
 		if (!IS_DEADDIR(host_inode)) {
-			ret = host_file->f_op->readdir(host_file, filldir, dirent);
+			ret = host_file->f_op->readdir(host_file, dirent, filldir);
 			file_accessed(host_file);
 		}
 	}
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index ec8896b264de..1d533a2ec3a6 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -368,6 +368,69 @@ void debugfs_remove(struct dentry *dentry)
 }
 EXPORT_SYMBOL_GPL(debugfs_remove);
 
+/**
+ * debugfs_rename - rename a file/directory in the debugfs filesystem
+ * @old_dir: a pointer to the parent dentry for the renamed object. This
+ *          should be a directory dentry.
+ * @old_dentry: dentry of an object to be renamed.
+ * @new_dir: a pointer to the parent dentry where the object should be
+ *          moved. This should be a directory dentry.
+ * @new_name: a pointer to a string containing the target name.
+ *
+ * This function renames a file/directory in debugfs.  The target must not
+ * exist for rename to succeed.
+ *
+ * This function will return a pointer to old_dentry (which is updated to
+ * reflect renaming) if it succeeds. If an error occurs, %NULL will be
+ * returned.
+ *
+ * If debugfs is not enabled in the kernel, the value -%ENODEV will be
+ * returned.
+ */
+struct dentry *debugfs_rename(struct dentry *old_dir, struct dentry *old_dentry,
+		struct dentry *new_dir, const char *new_name)
+{
+	int error;
+	struct dentry *dentry = NULL, *trap;
+	const char *old_name;
+
+	trap = lock_rename(new_dir, old_dir);
+	/* Source or destination directories don't exist? */
+	if (!old_dir->d_inode || !new_dir->d_inode)
+		goto exit;
+	/* Source does not exist, cyclic rename, or mountpoint? */
+	if (!old_dentry->d_inode || old_dentry == trap ||
+	    d_mountpoint(old_dentry))
+		goto exit;
+	dentry = lookup_one_len(new_name, new_dir, strlen(new_name));
+	/* Lookup failed, cyclic rename or target exists? */
+	if (IS_ERR(dentry) || dentry == trap || dentry->d_inode)
+		goto exit;
+
+	old_name = fsnotify_oldname_init(old_dentry->d_name.name);
+
+	error = simple_rename(old_dir->d_inode, old_dentry, new_dir->d_inode,
+		dentry);
+	if (error) {
+		fsnotify_oldname_free(old_name);
+		goto exit;
+	}
+	d_move(old_dentry, dentry);
+	fsnotify_move(old_dir->d_inode, new_dir->d_inode, old_name,
+		old_dentry->d_name.name, S_ISDIR(old_dentry->d_inode->i_mode),
+		NULL, old_dentry->d_inode);
+	fsnotify_oldname_free(old_name);
+	unlock_rename(new_dir, old_dir);
+	dput(dentry);
+	return old_dentry;
+exit:
+	if (dentry && !IS_ERR(dentry))
+		dput(dentry);
+	unlock_rename(new_dir, old_dir);
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(debugfs_rename);
+
 static decl_subsys(debug, NULL, NULL);
 
 static int __init debugfs_init(void)
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index 606128f5c927..02ca6f1e55d7 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -840,8 +840,6 @@ static int __init ecryptfs_init(void)
 		goto out;
 	}
 	kobj_set_kset_s(&ecryptfs_subsys, fs_subsys);
-	sysfs_attr_version.attr.owner = THIS_MODULE;
-	sysfs_attr_version_str.attr.owner = THIS_MODULE;
 	rc = do_sysfs_registration();
 	if (rc) {
 		printk(KERN_ERR "sysfs registration failed\n");
diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index 96070bff93fc..572601e98dcd 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -44,9 +44,8 @@ static struct nsm_handle *	nsm_find(const struct sockaddr_in *sin,
  */
 static struct nlm_host *
 nlm_lookup_host(int server, const struct sockaddr_in *sin,
-					int proto, int version,
-					const char *hostname,
-					int hostname_len)
+		int proto, int version, const char *hostname,
+		int hostname_len, const struct sockaddr_in *ssin)
 {
 	struct hlist_head *chain;
 	struct hlist_node *pos;
@@ -54,7 +53,9 @@ nlm_lookup_host(int server, const struct sockaddr_in *sin,
 	struct nsm_handle *nsm = NULL;
 	int		hash;
 
-	dprintk("lockd: nlm_lookup_host(%u.%u.%u.%u, p=%d, v=%d, my role=%s, name=%.*s)\n",
+	dprintk("lockd: nlm_lookup_host("NIPQUAD_FMT"->"NIPQUAD_FMT
+			", p=%d, v=%d, my role=%s, name=%.*s)\n",
+			NIPQUAD(ssin->sin_addr.s_addr),
 			NIPQUAD(sin->sin_addr.s_addr), proto, version,
 			server? "server" : "client",
 			hostname_len,
@@ -91,6 +92,8 @@ nlm_lookup_host(int server, const struct sockaddr_in *sin,
 			continue;
 		if (host->h_server != server)
 			continue;
+		if (!nlm_cmp_addr(&host->h_saddr, ssin))
+			continue;
 
 		/* Move to head of hash chain. */
 		hlist_del(&host->h_hash);
@@ -118,6 +121,7 @@ nlm_lookup_host(int server, const struct sockaddr_in *sin,
 	host->h_name	   = nsm->sm_name;
 	host->h_addr       = *sin;
 	host->h_addr.sin_port = 0;	/* ouch! */
+	host->h_saddr	   = *ssin;
 	host->h_version    = version;
 	host->h_proto      = proto;
 	host->h_rpcclnt    = NULL;
@@ -161,15 +165,9 @@ nlm_destroy_host(struct nlm_host *host)
 	 */
 	nsm_unmonitor(host);
 
-	if ((clnt = host->h_rpcclnt) != NULL) {
-		if (atomic_read(&clnt->cl_users)) {
-			printk(KERN_WARNING
-				"lockd: active RPC handle\n");
-			clnt->cl_dead = 1;
-		} else {
-			rpc_destroy_client(host->h_rpcclnt);
-		}
-	}
+	clnt = host->h_rpcclnt;
+	if (clnt != NULL)
+		rpc_shutdown_client(clnt);
 	kfree(host);
 }
 
@@ -180,8 +178,10 @@ struct nlm_host *
 nlmclnt_lookup_host(const struct sockaddr_in *sin, int proto, int version,
 			const char *hostname, int hostname_len)
 {
+	struct sockaddr_in ssin = {0};
+
 	return nlm_lookup_host(0, sin, proto, version,
-			       hostname, hostname_len);
+			       hostname, hostname_len, &ssin);
 }
 
 /*
@@ -191,9 +191,12 @@ struct nlm_host *
 nlmsvc_lookup_host(struct svc_rqst *rqstp,
 			const char *hostname, int hostname_len)
 {
+	struct sockaddr_in ssin = {0};
+
+	ssin.sin_addr = rqstp->rq_daddr.addr;
 	return nlm_lookup_host(1, svc_addr_in(rqstp),
 			       rqstp->rq_prot, rqstp->rq_vers,
-			       hostname, hostname_len);
+			       hostname, hostname_len, &ssin);
 }
 
 /*
@@ -204,8 +207,9 @@ nlm_bind_host(struct nlm_host *host)
 {
 	struct rpc_clnt	*clnt;
 
-	dprintk("lockd: nlm_bind_host(%08x)\n",
-			(unsigned)ntohl(host->h_addr.sin_addr.s_addr));
+	dprintk("lockd: nlm_bind_host("NIPQUAD_FMT"->"NIPQUAD_FMT")\n",
+			NIPQUAD(host->h_saddr.sin_addr),
+			NIPQUAD(host->h_addr.sin_addr));
 
 	/* Lock host handle */
 	mutex_lock(&host->h_mutex);
@@ -232,6 +236,7 @@ nlm_bind_host(struct nlm_host *host)
 			.protocol	= host->h_proto,
 			.address	= (struct sockaddr *)&host->h_addr,
 			.addrsize	= sizeof(host->h_addr),
+			.saddress	= (struct sockaddr *)&host->h_saddr,
 			.timeout	= &timeparms,
 			.servername	= host->h_name,
 			.program	= &nlm_program,
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index 2102e2d0134d..3353ed8421a7 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -61,6 +61,7 @@ nsm_mon_unmon(struct nsm_handle *nsm, u32 proc, struct nsm_res *res)
 			status);
 	else
 		status = 0;
+	rpc_shutdown_client(clnt);
  out:
 	return status;
 }
@@ -138,7 +139,6 @@ nsm_create(void)
 		.program	= &nsm_program,
 		.version	= SM_VERSION,
 		.authflavor	= RPC_AUTH_NULL,
-		.flags		= (RPC_CLNT_CREATE_ONESHOT),
 	};
 
 	return rpc_create(&args);
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index 126b1bf02c0e..26809325469c 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -123,9 +123,6 @@ lockd(struct svc_rqst *rqstp)
 	/* Process request with signals blocked, but allow SIGKILL.  */
 	allow_signal(SIGKILL);
 
-	/* kick rpciod */
-	rpciod_up();
-
 	dprintk("NFS locking service started (ver " LOCKD_VERSION ").\n");
 
 	if (!nlm_timeout)
@@ -202,9 +199,6 @@ lockd(struct svc_rqst *rqstp)
 	/* Exit the RPC thread */
 	svc_exit_thread(rqstp);
 
-	/* release rpciod */
-	rpciod_down();
-
 	/* Release module */
 	unlock_kernel();
 	module_put_and_exit(0);
diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile
index f4580b44eef4..b55cb236cf74 100644
--- a/fs/nfs/Makefile
+++ b/fs/nfs/Makefile
@@ -6,8 +6,8 @@ obj-$(CONFIG_NFS_FS) += nfs.o
 
 nfs-y 			:= client.o dir.o file.o getroot.o inode.o super.o nfs2xdr.o \
 			   pagelist.o proc.o read.o symlink.o unlink.o \
-			   write.o namespace.o
-nfs-$(CONFIG_ROOT_NFS)	+= nfsroot.o mount_clnt.o      
+			   write.o namespace.o mount_clnt.o
+nfs-$(CONFIG_ROOT_NFS)	+= nfsroot.o
 nfs-$(CONFIG_NFS_V3)	+= nfs3proc.o nfs3xdr.o
 nfs-$(CONFIG_NFS_V3_ACL)	+= nfs3acl.o
 nfs-$(CONFIG_NFS_V4)	+= nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o \
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 881fa4900923..ccb455053ee4 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -102,19 +102,10 @@ static struct nfs_client *nfs_alloc_client(const char *hostname,
 					   int nfsversion)
 {
 	struct nfs_client *clp;
-	int error;
 
 	if ((clp = kzalloc(sizeof(*clp), GFP_KERNEL)) == NULL)
 		goto error_0;
 
-	error = rpciod_up();
-	if (error < 0) {
-		dprintk("%s: couldn't start rpciod! Error = %d\n",
-				__FUNCTION__, error);
-		goto error_1;
-	}
-	__set_bit(NFS_CS_RPCIOD, &clp->cl_res_state);
-
 	if (nfsversion == 4) {
 		if (nfs_callback_up() < 0)
 			goto error_2;
@@ -139,8 +130,6 @@ static struct nfs_client *nfs_alloc_client(const char *hostname,
 #ifdef CONFIG_NFS_V4
 	init_rwsem(&clp->cl_sem);
 	INIT_LIST_HEAD(&clp->cl_delegations);
-	INIT_LIST_HEAD(&clp->cl_state_owners);
-	INIT_LIST_HEAD(&clp->cl_unused);
 	spin_lock_init(&clp->cl_lock);
 	INIT_DELAYED_WORK(&clp->cl_renewd, nfs4_renew_state);
 	rpc_init_wait_queue(&clp->cl_rpcwaitq, "NFS client");
@@ -154,9 +143,6 @@ error_3:
 	if (__test_and_clear_bit(NFS_CS_CALLBACK, &clp->cl_res_state))
 		nfs_callback_down();
 error_2:
-	rpciod_down();
-	__clear_bit(NFS_CS_RPCIOD, &clp->cl_res_state);
-error_1:
 	kfree(clp);
 error_0:
 	return NULL;
@@ -167,16 +153,7 @@ static void nfs4_shutdown_client(struct nfs_client *clp)
 #ifdef CONFIG_NFS_V4
 	if (__test_and_clear_bit(NFS_CS_RENEWD, &clp->cl_res_state))
 		nfs4_kill_renewd(clp);
-	while (!list_empty(&clp->cl_unused)) {
-		struct nfs4_state_owner *sp;
-
-		sp = list_entry(clp->cl_unused.next,
-				struct nfs4_state_owner,
-				so_list);
-		list_del(&sp->so_list);
-		kfree(sp);
-	}
-	BUG_ON(!list_empty(&clp->cl_state_owners));
+	BUG_ON(!RB_EMPTY_ROOT(&clp->cl_state_owners));
 	if (__test_and_clear_bit(NFS_CS_IDMAP, &clp->cl_res_state))
 		nfs_idmap_delete(clp);
 #endif
@@ -198,9 +175,6 @@ static void nfs_free_client(struct nfs_client *clp)
 	if (__test_and_clear_bit(NFS_CS_CALLBACK, &clp->cl_res_state))
 		nfs_callback_down();
 
-	if (__test_and_clear_bit(NFS_CS_RPCIOD, &clp->cl_res_state))
-		rpciod_down();
-
 	kfree(clp->cl_hostname);
 	kfree(clp);
 
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index 7f37d1bea83f..20ac403469a0 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -27,6 +27,13 @@ static void nfs_free_delegation(struct nfs_delegation *delegation)
 	kfree(delegation);
 }
 
+static void nfs_free_delegation_callback(struct rcu_head *head)
+{
+	struct nfs_delegation *delegation = container_of(head, struct nfs_delegation, rcu);
+
+	nfs_free_delegation(delegation);
+}
+
 static int nfs_delegation_claim_locks(struct nfs_open_context *ctx, struct nfs4_state *state)
 {
 	struct inode *inode = state->inode;
@@ -57,7 +64,7 @@ out_err:
 	return status;
 }
 
-static void nfs_delegation_claim_opens(struct inode *inode)
+static void nfs_delegation_claim_opens(struct inode *inode, const nfs4_stateid *stateid)
 {
 	struct nfs_inode *nfsi = NFS_I(inode);
 	struct nfs_open_context *ctx;
@@ -72,9 +79,11 @@ again:
 			continue;
 		if (!test_bit(NFS_DELEGATED_STATE, &state->flags))
 			continue;
+		if (memcmp(state->stateid.data, stateid->data, sizeof(state->stateid.data)) != 0)
+			continue;
 		get_nfs_open_context(ctx);
 		spin_unlock(&inode->i_lock);
-		err = nfs4_open_delegation_recall(ctx->dentry, state);
+		err = nfs4_open_delegation_recall(ctx, state, stateid);
 		if (err >= 0)
 			err = nfs_delegation_claim_locks(ctx, state);
 		put_nfs_open_context(ctx);
@@ -115,10 +124,6 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct
 	struct nfs_delegation *delegation;
 	int status = 0;
 
-	/* Ensure we first revalidate the attributes and page cache! */
-	if ((nfsi->cache_validity & (NFS_INO_REVAL_PAGECACHE|NFS_INO_INVALID_ATTR)))
-		__nfs_revalidate_inode(NFS_SERVER(inode), inode);
-
 	delegation = kmalloc(sizeof(*delegation), GFP_KERNEL);
 	if (delegation == NULL)
 		return -ENOMEM;
@@ -131,10 +136,10 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct
 	delegation->inode = inode;
 
 	spin_lock(&clp->cl_lock);
-	if (nfsi->delegation == NULL) {
-		list_add(&delegation->super_list, &clp->cl_delegations);
-		nfsi->delegation = delegation;
+	if (rcu_dereference(nfsi->delegation) == NULL) {
+		list_add_rcu(&delegation->super_list, &clp->cl_delegations);
 		nfsi->delegation_state = delegation->type;
+		rcu_assign_pointer(nfsi->delegation, delegation);
 		delegation = NULL;
 	} else {
 		if (memcmp(&delegation->stateid, &nfsi->delegation->stateid,
@@ -145,6 +150,12 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct
 			status = -EIO;
 		}
 	}
+
+	/* Ensure we revalidate the attributes and page cache! */
+	spin_lock(&inode->i_lock);
+	nfsi->cache_validity |= NFS_INO_REVAL_FORCED;
+	spin_unlock(&inode->i_lock);
+
 	spin_unlock(&clp->cl_lock);
 	kfree(delegation);
 	return status;
@@ -155,7 +166,7 @@ static int nfs_do_return_delegation(struct inode *inode, struct nfs_delegation *
 	int res = 0;
 
 	res = nfs4_proc_delegreturn(inode, delegation->cred, &delegation->stateid);
-	nfs_free_delegation(delegation);
+	call_rcu(&delegation->rcu, nfs_free_delegation_callback);
 	return res;
 }
 
@@ -170,33 +181,55 @@ static void nfs_msync_inode(struct inode *inode)
 /*
  * Basic procedure for returning a delegation to the server
  */
-int __nfs_inode_return_delegation(struct inode *inode)
+static int __nfs_inode_return_delegation(struct inode *inode, struct nfs_delegation *delegation)
 {
 	struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
 	struct nfs_inode *nfsi = NFS_I(inode);
-	struct nfs_delegation *delegation;
-	int res = 0;
 
 	nfs_msync_inode(inode);
 	down_read(&clp->cl_sem);
 	/* Guard against new delegated open calls */
 	down_write(&nfsi->rwsem);
-	spin_lock(&clp->cl_lock);
-	delegation = nfsi->delegation;
-	if (delegation != NULL) {
-		list_del_init(&delegation->super_list);
-		nfsi->delegation = NULL;
-		nfsi->delegation_state = 0;
-	}
-	spin_unlock(&clp->cl_lock);
-	nfs_delegation_claim_opens(inode);
+	nfs_delegation_claim_opens(inode, &delegation->stateid);
 	up_write(&nfsi->rwsem);
 	up_read(&clp->cl_sem);
 	nfs_msync_inode(inode);
 
-	if (delegation != NULL)
-		res = nfs_do_return_delegation(inode, delegation);
-	return res;
+	return nfs_do_return_delegation(inode, delegation);
+}
+
+static struct nfs_delegation *nfs_detach_delegation_locked(struct nfs_inode *nfsi, const nfs4_stateid *stateid)
+{
+	struct nfs_delegation *delegation = rcu_dereference(nfsi->delegation);
+
+	if (delegation == NULL)
+		goto nomatch;
+	if (stateid != NULL && memcmp(delegation->stateid.data, stateid->data,
+				sizeof(delegation->stateid.data)) != 0)
+		goto nomatch;
+	list_del_rcu(&delegation->super_list);
+	nfsi->delegation_state = 0;
+	rcu_assign_pointer(nfsi->delegation, NULL);
+	return delegation;
+nomatch:
+	return NULL;
+}
+
+int nfs_inode_return_delegation(struct inode *inode)
+{
+	struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
+	struct nfs_inode *nfsi = NFS_I(inode);
+	struct nfs_delegation *delegation;
+	int err = 0;
+
+	if (rcu_dereference(nfsi->delegation) != NULL) {
+		spin_lock(&clp->cl_lock);
+		delegation = nfs_detach_delegation_locked(nfsi, NULL);
+		spin_unlock(&clp->cl_lock);
+		if (delegation != NULL)
+			err = __nfs_inode_return_delegation(inode, delegation);
+	}
+	return err;
 }
 
 /*
@@ -211,19 +244,23 @@ void nfs_return_all_delegations(struct super_block *sb)
 	if (clp == NULL)
 		return;
 restart:
-	spin_lock(&clp->cl_lock);
-	list_for_each_entry(delegation, &clp->cl_delegations, super_list) {
+	rcu_read_lock();
+	list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) {
 		if (delegation->inode->i_sb != sb)
 			continue;
 		inode = igrab(delegation->inode);
 		if (inode == NULL)
 			continue;
+		spin_lock(&clp->cl_lock);
+		delegation = nfs_detach_delegation_locked(NFS_I(inode), NULL);
 		spin_unlock(&clp->cl_lock);
-		nfs_inode_return_delegation(inode);
+		rcu_read_unlock();
+		if (delegation != NULL)
+			__nfs_inode_return_delegation(inode, delegation);
 		iput(inode);
 		goto restart;
 	}
-	spin_unlock(&clp->cl_lock);
+	rcu_read_unlock();
 }
 
 static int nfs_do_expire_all_delegations(void *ptr)
@@ -234,22 +271,26 @@ static int nfs_do_expire_all_delegations(void *ptr)
 
 	allow_signal(SIGKILL);
 restart:
-	spin_lock(&clp->cl_lock);
 	if (test_bit(NFS4CLNT_STATE_RECOVER, &clp->cl_state) != 0)
 		goto out;
 	if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) == 0)
 		goto out;
-	list_for_each_entry(delegation, &clp->cl_delegations, super_list) {
+	rcu_read_lock();
+	list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) {
 		inode = igrab(delegation->inode);
 		if (inode == NULL)
 			continue;
+		spin_lock(&clp->cl_lock);
+		delegation = nfs_detach_delegation_locked(NFS_I(inode), NULL);
 		spin_unlock(&clp->cl_lock);
-		nfs_inode_return_delegation(inode);
+		rcu_read_unlock();
+		if (delegation)
+			__nfs_inode_return_delegation(inode, delegation);
 		iput(inode);
 		goto restart;
 	}
+	rcu_read_unlock();
 out:
-	spin_unlock(&clp->cl_lock);
 	nfs_put_client(clp);
 	module_put_and_exit(0);
 }
@@ -280,17 +321,21 @@ void nfs_handle_cb_pathdown(struct nfs_client *clp)
 	if (clp == NULL)
 		return;
 restart:
-	spin_lock(&clp->cl_lock);
-	list_for_each_entry(delegation, &clp->cl_delegations, super_list) {
+	rcu_read_lock();
+	list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) {
 		inode = igrab(delegation->inode);
 		if (inode == NULL)
 			continue;
+		spin_lock(&clp->cl_lock);
+		delegation = nfs_detach_delegation_locked(NFS_I(inode), NULL);
 		spin_unlock(&clp->cl_lock);
-		nfs_inode_return_delegation(inode);
+		rcu_read_unlock();
+		if (delegation != NULL)
+			__nfs_inode_return_delegation(inode, delegation);
 		iput(inode);
 		goto restart;
 	}
-	spin_unlock(&clp->cl_lock);
+	rcu_read_unlock();
 }
 
 struct recall_threadargs {
@@ -316,21 +361,14 @@ static int recall_thread(void *data)
 	down_read(&clp->cl_sem);
 	down_write(&nfsi->rwsem);
 	spin_lock(&clp->cl_lock);
-	delegation = nfsi->delegation;
-	if (delegation != NULL && memcmp(delegation->stateid.data,
-				args->stateid->data,
-				sizeof(delegation->stateid.data)) == 0) {
-		list_del_init(&delegation->super_list);
-		nfsi->delegation = NULL;
-		nfsi->delegation_state = 0;
+	delegation = nfs_detach_delegation_locked(nfsi, args->stateid);
+	if (delegation != NULL)
 		args->result = 0;
-	} else {
-		delegation = NULL;
+	else
 		args->result = -ENOENT;
-	}
 	spin_unlock(&clp->cl_lock);
 	complete(&args->started);
-	nfs_delegation_claim_opens(inode);
+	nfs_delegation_claim_opens(inode, args->stateid);
 	up_write(&nfsi->rwsem);
 	up_read(&clp->cl_sem);
 	nfs_msync_inode(inode);
@@ -371,14 +409,14 @@ struct inode *nfs_delegation_find_inode(struct nfs_client *clp, const struct nfs
 {
 	struct nfs_delegation *delegation;
 	struct inode *res = NULL;
-	spin_lock(&clp->cl_lock);
-	list_for_each_entry(delegation, &clp->cl_delegations, super_list) {
+	rcu_read_lock();
+	list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) {
 		if (nfs_compare_fh(fhandle, &NFS_I(delegation->inode)->fh) == 0) {
 			res = igrab(delegation->inode);
 			break;
 		}
 	}
-	spin_unlock(&clp->cl_lock);
+	rcu_read_unlock();
 	return res;
 }
 
@@ -388,10 +426,10 @@ struct inode *nfs_delegation_find_inode(struct nfs_client *clp, const struct nfs
 void nfs_delegation_mark_reclaim(struct nfs_client *clp)
 {
 	struct nfs_delegation *delegation;
-	spin_lock(&clp->cl_lock);
-	list_for_each_entry(delegation, &clp->cl_delegations, super_list)
+	rcu_read_lock();
+	list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list)
 		delegation->flags |= NFS_DELEGATION_NEED_RECLAIM;
-	spin_unlock(&clp->cl_lock);
+	rcu_read_unlock();
 }
 
 /*
@@ -399,39 +437,35 @@ void nfs_delegation_mark_reclaim(struct nfs_client *clp)
  */
 void nfs_delegation_reap_unclaimed(struct nfs_client *clp)
 {
-	struct nfs_delegation *delegation, *n;
-	LIST_HEAD(head);
-	spin_lock(&clp->cl_lock);
-	list_for_each_entry_safe(delegation, n, &clp->cl_delegations, super_list) {
+	struct nfs_delegation *delegation;
+restart:
+	rcu_read_lock();
+	list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) {
 		if ((delegation->flags & NFS_DELEGATION_NEED_RECLAIM) == 0)
 			continue;
-		list_move(&delegation->super_list, &head);
-		NFS_I(delegation->inode)->delegation = NULL;
-		NFS_I(delegation->inode)->delegation_state = 0;
-	}
-	spin_unlock(&clp->cl_lock);
-	while(!list_empty(&head)) {
-		delegation = list_entry(head.next, struct nfs_delegation, super_list);
-		list_del(&delegation->super_list);
-		nfs_free_delegation(delegation);
+		spin_lock(&clp->cl_lock);
+		delegation = nfs_detach_delegation_locked(NFS_I(delegation->inode), NULL);
+		spin_unlock(&clp->cl_lock);
+		rcu_read_unlock();
+		if (delegation != NULL)
+			call_rcu(&delegation->rcu, nfs_free_delegation_callback);
+		goto restart;
 	}
+	rcu_read_unlock();
 }
 
 int nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode)
 {
-	struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
 	struct nfs_inode *nfsi = NFS_I(inode);
 	struct nfs_delegation *delegation;
-	int res = 0;
+	int ret = 0;
 
-	if (nfsi->delegation_state == 0)
-		return 0;
-	spin_lock(&clp->cl_lock);
-	delegation = nfsi->delegation;
+	rcu_read_lock();
+	delegation = rcu_dereference(nfsi->delegation);
 	if (delegation != NULL) {
 		memcpy(dst->data, delegation->stateid.data, sizeof(dst->data));
-		res = 1;
+		ret = 1;
 	}
-	spin_unlock(&clp->cl_lock);
-	return res;
+	rcu_read_unlock();
+	return ret;
 }
diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h
index 2cfd4b24c7fe..5874ce7fdbae 100644
--- a/fs/nfs/delegation.h
+++ b/fs/nfs/delegation.h
@@ -22,11 +22,12 @@ struct nfs_delegation {
 	long flags;
 	loff_t maxsize;
 	__u64 change_attr;
+	struct rcu_head rcu;
 };
 
 int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res);
 void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res);
-int __nfs_inode_return_delegation(struct inode *inode);
+int nfs_inode_return_delegation(struct inode *inode);
 int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid);
 
 struct inode *nfs_delegation_find_inode(struct nfs_client *clp, const struct nfs_fh *fhandle);
@@ -39,27 +40,24 @@ void nfs_delegation_reap_unclaimed(struct nfs_client *clp);
 
 /* NFSv4 delegation-related procedures */
 int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid);
-int nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state *state);
+int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid);
 int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl);
 int nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode);
 
 static inline int nfs_have_delegation(struct inode *inode, int flags)
 {
+	struct nfs_delegation *delegation;
+	int ret = 0;
+
 	flags &= FMODE_READ|FMODE_WRITE;
-	smp_rmb();
-	if ((NFS_I(inode)->delegation_state & flags) == flags)
-		return 1;
-	return 0;
+	rcu_read_lock();
+	delegation = rcu_dereference(NFS_I(inode)->delegation);
+	if (delegation != NULL && (delegation->type & flags) == flags)
+		ret = 1;
+	rcu_read_unlock();
+	return ret;
 }
 
-static inline int nfs_inode_return_delegation(struct inode *inode)
-{
-	int err = 0;
-
-	if (NFS_I(inode)->delegation != NULL)
-		err = __nfs_inode_return_delegation(inode);
-	return err;
-}
 #else
 static inline int nfs_have_delegation(struct inode *inode, int flags)
 {
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index c27258b5d3e1..322141f4ab48 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -897,14 +897,13 @@ int nfs_is_exclusive_create(struct inode *dir, struct nameidata *nd)
 	return (nd->intent.open.flags & O_EXCL) != 0;
 }
 
-static inline int nfs_reval_fsid(struct vfsmount *mnt, struct inode *dir,
-				 struct nfs_fh *fh, struct nfs_fattr *fattr)
+static inline int nfs_reval_fsid(struct inode *dir, const struct nfs_fattr *fattr)
 {
 	struct nfs_server *server = NFS_SERVER(dir);
 
 	if (!nfs_fsid_equal(&server->fsid, &fattr->fsid))
-		/* Revalidate fsid on root dir */
-		return __nfs_revalidate_inode(server, mnt->mnt_root->d_inode);
+		/* Revalidate fsid using the parent directory */
+		return __nfs_revalidate_inode(server, dir);
 	return 0;
 }
 
@@ -946,7 +945,7 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru
 		res = ERR_PTR(error);
 		goto out_unlock;
 	}
-	error = nfs_reval_fsid(nd->mnt, dir, &fhandle, &fattr);
+	error = nfs_reval_fsid(dir, &fattr);
 	if (error < 0) {
 		res = ERR_PTR(error);
 		goto out_unlock;
@@ -1244,7 +1243,7 @@ static int nfs_create(struct inode *dir, struct dentry *dentry, int mode,
 	attr.ia_mode = mode;
 	attr.ia_valid = ATTR_MODE;
 
-	if (nd && (nd->flags & LOOKUP_CREATE))
+	if ((nd->flags & LOOKUP_CREATE) != 0)
 		open_flags = nd->intent.open.flags;
 
 	lock_kernel();
@@ -1535,7 +1534,7 @@ static int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *sym
 
 	lock_kernel();
 
-	page = alloc_page(GFP_KERNEL);
+	page = alloc_page(GFP_HIGHUSER);
 	if (!page) {
 		unlock_kernel();
 		return -ENOMEM;
@@ -1744,8 +1743,8 @@ int nfs_access_cache_shrinker(int nr_to_scan, gfp_t gfp_mask)
 	struct nfs_inode *nfsi;
 	struct nfs_access_entry *cache;
 
-	spin_lock(&nfs_access_lru_lock);
 restart:
+	spin_lock(&nfs_access_lru_lock);
 	list_for_each_entry(nfsi, &nfs_access_lru_list, access_cache_inode_lru) {
 		struct inode *inode;
 
@@ -1770,6 +1769,7 @@ remove_lru_entry:
 			clear_bit(NFS_INO_ACL_LRU_SET, &nfsi->flags);
 		}
 		spin_unlock(&inode->i_lock);
+		spin_unlock(&nfs_access_lru_lock);
 		iput(inode);
 		goto restart;
 	}
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 00eee87510fe..a5c82b6f3b45 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -266,7 +266,7 @@ static const struct rpc_call_ops nfs_read_direct_ops = {
 static ssize_t nfs_direct_read_schedule(struct nfs_direct_req *dreq, unsigned long user_addr, size_t count, loff_t pos)
 {
 	struct nfs_open_context *ctx = dreq->ctx;
-	struct inode *inode = ctx->dentry->d_inode;
+	struct inode *inode = ctx->path.dentry->d_inode;
 	size_t rsize = NFS_SERVER(inode)->rsize;
 	unsigned int pgbase;
 	int result;
@@ -295,9 +295,14 @@ static ssize_t nfs_direct_read_schedule(struct nfs_direct_req *dreq, unsigned lo
 			break;
 		}
 		if ((unsigned)result < data->npages) {
-			nfs_direct_release_pages(data->pagevec, result);
-			nfs_readdata_release(data);
-			break;
+			bytes = result * PAGE_SIZE;
+			if (bytes <= pgbase) {
+				nfs_direct_release_pages(data->pagevec, result);
+				nfs_readdata_release(data);
+				break;
+			}
+			bytes -= pgbase;
+			data->npages = result;
 		}
 
 		get_dreq(dreq);
@@ -601,7 +606,7 @@ static const struct rpc_call_ops nfs_write_direct_ops = {
 static ssize_t nfs_direct_write_schedule(struct nfs_direct_req *dreq, unsigned long user_addr, size_t count, loff_t pos, int sync)
 {
 	struct nfs_open_context *ctx = dreq->ctx;
-	struct inode *inode = ctx->dentry->d_inode;
+	struct inode *inode = ctx->path.dentry->d_inode;
 	size_t wsize = NFS_SERVER(inode)->wsize;
 	unsigned int pgbase;
 	int result;
@@ -630,9 +635,14 @@ static ssize_t nfs_direct_write_schedule(struct nfs_direct_req *dreq, unsigned l
 			break;
 		}
 		if ((unsigned)result < data->npages) {
-			nfs_direct_release_pages(data->pagevec, result);
-			nfs_writedata_release(data);
-			break;
+			bytes = result * PAGE_SIZE;
+			if (bytes <= pgbase) {
+				nfs_direct_release_pages(data->pagevec, result);
+				nfs_writedata_release(data);
+				break;
+			}
+			bytes -= pgbase;
+			data->npages = result;
 		}
 
 		get_dreq(dreq);
@@ -763,10 +773,8 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, const struct iovec *iov,
 		(unsigned long) count, (long long) pos);
 
 	if (nr_segs != 1)
-		return -EINVAL;
-
-	if (count < 0)
 		goto out;
+
 	retval = -EFAULT;
 	if (!access_ok(VERIFY_WRITE, buf, count))
 		goto out;
@@ -814,7 +822,7 @@ out:
 ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
 				unsigned long nr_segs, loff_t pos)
 {
-	ssize_t retval;
+	ssize_t retval = -EINVAL;
 	struct file *file = iocb->ki_filp;
 	struct address_space *mapping = file->f_mapping;
 	/* XXX: temporary */
@@ -827,7 +835,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
 		(unsigned long) count, (long long) pos);
 
 	if (nr_segs != 1)
-		return -EINVAL;
+		goto out;
 
 	retval = generic_write_checks(file, &pos, &count, 0);
 	if (retval)
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index bd9f5a836592..3d9fccf4ef93 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -461,14 +461,14 @@ static struct nfs_open_context *alloc_nfs_open_context(struct vfsmount *mnt, str
 
 	ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
 	if (ctx != NULL) {
-		atomic_set(&ctx->count, 1);
-		ctx->dentry = dget(dentry);
-		ctx->vfsmnt = mntget(mnt);
+		ctx->path.dentry = dget(dentry);
+		ctx->path.mnt = mntget(mnt);
 		ctx->cred = get_rpccred(cred);
 		ctx->state = NULL;
 		ctx->lockowner = current->files;
 		ctx->error = 0;
 		ctx->dir_cookie = 0;
+		kref_init(&ctx->kref);
 	}
 	return ctx;
 }
@@ -476,27 +476,33 @@ static struct nfs_open_context *alloc_nfs_open_context(struct vfsmount *mnt, str
 struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx)
 {
 	if (ctx != NULL)
-		atomic_inc(&ctx->count);
+		kref_get(&ctx->kref);
 	return ctx;
 }
 
-void put_nfs_open_context(struct nfs_open_context *ctx)
+static void nfs_free_open_context(struct kref *kref)
 {
-	if (atomic_dec_and_test(&ctx->count)) {
-		if (!list_empty(&ctx->list)) {
-			struct inode *inode = ctx->dentry->d_inode;
-			spin_lock(&inode->i_lock);
-			list_del(&ctx->list);
-			spin_unlock(&inode->i_lock);
-		}
-		if (ctx->state != NULL)
-			nfs4_close_state(ctx->state, ctx->mode);
-		if (ctx->cred != NULL)
-			put_rpccred(ctx->cred);
-		dput(ctx->dentry);
-		mntput(ctx->vfsmnt);
-		kfree(ctx);
+	struct nfs_open_context *ctx = container_of(kref,
+			struct nfs_open_context, kref);
+
+	if (!list_empty(&ctx->list)) {
+		struct inode *inode = ctx->path.dentry->d_inode;
+		spin_lock(&inode->i_lock);
+		list_del(&ctx->list);
+		spin_unlock(&inode->i_lock);
 	}
+	if (ctx->state != NULL)
+		nfs4_close_state(&ctx->path, ctx->state, ctx->mode);
+	if (ctx->cred != NULL)
+		put_rpccred(ctx->cred);
+	dput(ctx->path.dentry);
+	mntput(ctx->path.mnt);
+	kfree(ctx);
+}
+
+void put_nfs_open_context(struct nfs_open_context *ctx)
+{
+	kref_put(&ctx->kref, nfs_free_open_context);
 }
 
 /*
@@ -961,8 +967,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
 		goto out_changed;
 
 	server = NFS_SERVER(inode);
-	/* Update the fsid if and only if this is the root directory */
-	if (inode == inode->i_sb->s_root->d_inode
+	/* Update the fsid? */
+	if (S_ISDIR(inode->i_mode)
 			&& !nfs_fsid_equal(&server->fsid, &fattr->fsid))
 		server->fsid = fattr->fsid;
 
@@ -1066,8 +1072,10 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
 		invalid &= ~NFS_INO_INVALID_DATA;
 	if (data_stable)
 		invalid &= ~(NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ATIME|NFS_INO_REVAL_PAGECACHE);
-	if (!nfs_have_delegation(inode, FMODE_READ))
+	if (!nfs_have_delegation(inode, FMODE_READ) ||
+			(nfsi->cache_validity & NFS_INO_REVAL_FORCED))
 		nfsi->cache_validity |= invalid;
+	nfsi->cache_validity &= ~NFS_INO_REVAL_FORCED;
 
 	return 0;
  out_changed:
@@ -1103,27 +1111,10 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
  */
 void nfs4_clear_inode(struct inode *inode)
 {
-	struct nfs_inode *nfsi = NFS_I(inode);
-
 	/* If we are holding a delegation, return it! */
 	nfs_inode_return_delegation(inode);
 	/* First call standard NFS clear_inode() code */
 	nfs_clear_inode(inode);
-	/* Now clear out any remaining state */
-	while (!list_empty(&nfsi->open_states)) {
-		struct nfs4_state *state;
-		
-		state = list_entry(nfsi->open_states.next,
-				struct nfs4_state,
-				inode_states);
-		dprintk("%s(%s/%Ld): found unclaimed NFSv4 state %p\n",
-				__FUNCTION__,
-				inode->i_sb->s_id,
-				(long long)NFS_FILEID(inode),
-				state);
-		BUG_ON(atomic_read(&state->count) != 1);
-		nfs4_close_state(state, state->state);
-	}
 }
 #endif
 
@@ -1165,15 +1156,11 @@ static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flag
 	struct nfs_inode *nfsi = (struct nfs_inode *) foo;
 
 	inode_init_once(&nfsi->vfs_inode);
-	spin_lock_init(&nfsi->req_lock);
-	INIT_LIST_HEAD(&nfsi->dirty);
-	INIT_LIST_HEAD(&nfsi->commit);
 	INIT_LIST_HEAD(&nfsi->open_files);
 	INIT_LIST_HEAD(&nfsi->access_cache_entry_lru);
 	INIT_LIST_HEAD(&nfsi->access_cache_inode_lru);
 	INIT_RADIX_TREE(&nfsi->nfs_page_tree, GFP_ATOMIC);
 	atomic_set(&nfsi->data_updates, 0);
-	nfsi->ndirty = 0;
 	nfsi->ncommit = 0;
 	nfsi->npages = 0;
 	nfs4_init_once(nfsi);
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index ad2b40db1e65..76cf55d57101 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -183,9 +183,9 @@ unsigned long nfs_block_bits(unsigned long bsize, unsigned char *nrbitsp)
 /*
  * Calculate the number of 512byte blocks used.
  */
-static inline unsigned long nfs_calc_block_size(u64 tsize)
+static inline blkcnt_t nfs_calc_block_size(u64 tsize)
 {
-	loff_t used = (tsize + 511) >> 9;
+	blkcnt_t used = (tsize + 511) >> 9;
 	return (used > ULONG_MAX) ? ULONG_MAX : used;
 }
 
diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c
index ca5a266a3140..8afd9f7e7a97 100644
--- a/fs/nfs/mount_clnt.c
+++ b/fs/nfs/mount_clnt.c
@@ -1,7 +1,5 @@
 /*
- * linux/fs/nfs/mount_clnt.c
- *
- * MOUNT client to support NFSroot.
+ * In-kernel MOUNT protocol client
  *
  * Copyright (C) 1997, Olaf Kirch <okir@monad.swb.de>
  */
@@ -18,33 +16,31 @@
 #include <linux/nfs_fs.h>
 
 #ifdef RPC_DEBUG
-# define NFSDBG_FACILITY	NFSDBG_ROOT
+# define NFSDBG_FACILITY	NFSDBG_MOUNT
 #endif
 
-/*
-#define MOUNT_PROGRAM		100005
-#define MOUNT_VERSION		1
-#define MOUNT_MNT		1
-#define MOUNT_UMNT		3
- */
-
-static struct rpc_clnt *	mnt_create(char *, struct sockaddr_in *,
-								int, int);
 static struct rpc_program	mnt_program;
 
 struct mnt_fhstatus {
-	unsigned int		status;
-	struct nfs_fh *		fh;
+	u32 status;
+	struct nfs_fh *fh;
 };
 
-/*
- * Obtain an NFS file handle for the given host and path
+/**
+ * nfs_mount - Obtain an NFS file handle for the given host and path
+ * @addr: pointer to server's address
+ * @len: size of server's address
+ * @hostname: name of server host, or NULL
+ * @path: pointer to string containing export path to mount
+ * @version: mount version to use for this request
+ * @protocol: transport protocol to use for thie request
+ * @fh: pointer to location to place returned file handle
+ *
+ * Uses default timeout parameters specified by underlying transport.
  */
-int
-nfsroot_mount(struct sockaddr_in *addr, char *path, struct nfs_fh *fh,
-		int version, int protocol)
+int nfs_mount(struct sockaddr *addr, size_t len, char *hostname, char *path,
+	      int version, int protocol, struct nfs_fh *fh)
 {
-	struct rpc_clnt		*mnt_clnt;
 	struct mnt_fhstatus	result = {
 		.fh		= fh
 	};
@@ -52,16 +48,25 @@ nfsroot_mount(struct sockaddr_in *addr, char *path, struct nfs_fh *fh,
 		.rpc_argp	= path,
 		.rpc_resp	= &result,
 	};
-	char			hostname[32];
+	struct rpc_create_args args = {
+		.protocol	= protocol,
+		.address	= addr,
+		.addrsize	= len,
+		.servername	= hostname,
+		.program	= &mnt_program,
+		.version	= version,
+		.authflavor	= RPC_AUTH_UNIX,
+		.flags		= RPC_CLNT_CREATE_INTR,
+	};
+	struct rpc_clnt		*mnt_clnt;
 	int			status;
 
-	dprintk("NFS:      nfs_mount(%08x:%s)\n",
-			(unsigned)ntohl(addr->sin_addr.s_addr), path);
+	dprintk("NFS: sending MNT request for %s:%s\n",
+		(hostname ? hostname : "server"), path);
 
-	sprintf(hostname, "%u.%u.%u.%u", NIPQUAD(addr->sin_addr.s_addr));
-	mnt_clnt = mnt_create(hostname, addr, version, protocol);
+	mnt_clnt = rpc_create(&args);
 	if (IS_ERR(mnt_clnt))
-		return PTR_ERR(mnt_clnt);
+		goto out_clnt_err;
 
 	if (version == NFS_MNT3_VERSION)
 		msg.rpc_proc = &mnt_clnt->cl_procinfo[MOUNTPROC3_MNT];
@@ -69,33 +74,39 @@ nfsroot_mount(struct sockaddr_in *addr, char *path, struct nfs_fh *fh,
 		msg.rpc_proc = &mnt_clnt->cl_procinfo[MNTPROC_MNT];
 
 	status = rpc_call_sync(mnt_clnt, &msg, 0);
-	return status < 0? status : (result.status? -EACCES : 0);
-}
+	rpc_shutdown_client(mnt_clnt);
 
-static struct rpc_clnt *
-mnt_create(char *hostname, struct sockaddr_in *srvaddr, int version,
-		int protocol)
-{
-	struct rpc_create_args args = {
-		.protocol	= protocol,
-		.address	= (struct sockaddr *)srvaddr,
-		.addrsize	= sizeof(*srvaddr),
-		.servername	= hostname,
-		.program	= &mnt_program,
-		.version	= version,
-		.authflavor	= RPC_AUTH_UNIX,
-		.flags		= (RPC_CLNT_CREATE_ONESHOT |
-				   RPC_CLNT_CREATE_INTR),
-	};
+	if (status < 0)
+		goto out_call_err;
+	if (result.status != 0)
+		goto out_mnt_err;
+
+	dprintk("NFS: MNT request succeeded\n");
+	status = 0;
+
+out:
+	return status;
+
+out_clnt_err:
+	status = PTR_ERR(mnt_clnt);
+	dprintk("NFS: failed to create RPC client, status=%d\n", status);
+	goto out;
+
+out_call_err:
+	dprintk("NFS: failed to start MNT request, status=%d\n", status);
+	goto out;
 
-	return rpc_create(&args);
+out_mnt_err:
+	dprintk("NFS: MNT server returned result %d\n", result.status);
+	status = -EACCES;
+	goto out;
 }
 
 /*
  * XDR encode/decode functions for MOUNT
  */
-static int
-xdr_encode_dirpath(struct rpc_rqst *req, __be32 *p, const char *path)
+static int xdr_encode_dirpath(struct rpc_rqst *req, __be32 *p,
+			      const char *path)
 {
 	p = xdr_encode_string(p, path);
 
@@ -103,8 +114,8 @@ xdr_encode_dirpath(struct rpc_rqst *req, __be32 *p, const char *path)
 	return 0;
 }
 
-static int
-xdr_decode_fhstatus(struct rpc_rqst *req, __be32 *p, struct mnt_fhstatus *res)
+static int xdr_decode_fhstatus(struct rpc_rqst *req, __be32 *p,
+			       struct mnt_fhstatus *res)
 {
 	struct nfs_fh *fh = res->fh;
 
@@ -115,8 +126,8 @@ xdr_decode_fhstatus(struct rpc_rqst *req, __be32 *p, struct mnt_fhstatus *res)
 	return 0;
 }
 
-static int
-xdr_decode_fhstatus3(struct rpc_rqst *req, __be32 *p, struct mnt_fhstatus *res)
+static int xdr_decode_fhstatus3(struct rpc_rqst *req, __be32 *p,
+				struct mnt_fhstatus *res)
 {
 	struct nfs_fh *fh = res->fh;
 
@@ -135,53 +146,53 @@ xdr_decode_fhstatus3(struct rpc_rqst *req, __be32 *p, struct mnt_fhstatus *res)
 #define MNT_fhstatus_sz		(1 + 8)
 #define MNT_fhstatus3_sz	(1 + 16)
 
-static struct rpc_procinfo	mnt_procedures[] = {
-[MNTPROC_MNT] = {
-	  .p_proc		= MNTPROC_MNT,
-	  .p_encode		= (kxdrproc_t) xdr_encode_dirpath,	
-	  .p_decode		= (kxdrproc_t) xdr_decode_fhstatus,
-	  .p_arglen		= MNT_dirpath_sz,
-	  .p_replen		= MNT_fhstatus_sz,
-	  .p_statidx		= MNTPROC_MNT,
-	  .p_name		= "MOUNT",
+static struct rpc_procinfo mnt_procedures[] = {
+	[MNTPROC_MNT] = {
+		.p_proc		= MNTPROC_MNT,
+		.p_encode	= (kxdrproc_t) xdr_encode_dirpath,
+		.p_decode	= (kxdrproc_t) xdr_decode_fhstatus,
+		.p_arglen	= MNT_dirpath_sz,
+		.p_replen	= MNT_fhstatus_sz,
+		.p_statidx	= MNTPROC_MNT,
+		.p_name		= "MOUNT",
 	},
 };
 
 static struct rpc_procinfo mnt3_procedures[] = {
-[MOUNTPROC3_MNT] = {
-	  .p_proc		= MOUNTPROC3_MNT,
-	  .p_encode		= (kxdrproc_t) xdr_encode_dirpath,
-	  .p_decode		= (kxdrproc_t) xdr_decode_fhstatus3,
-	  .p_arglen		= MNT_dirpath_sz,
-	  .p_replen		= MNT_fhstatus3_sz,
-	  .p_statidx		= MOUNTPROC3_MNT,
-	  .p_name		= "MOUNT",
+	[MOUNTPROC3_MNT] = {
+		.p_proc		= MOUNTPROC3_MNT,
+		.p_encode	= (kxdrproc_t) xdr_encode_dirpath,
+		.p_decode	= (kxdrproc_t) xdr_decode_fhstatus3,
+		.p_arglen	= MNT_dirpath_sz,
+		.p_replen	= MNT_fhstatus3_sz,
+		.p_statidx	= MOUNTPROC3_MNT,
+		.p_name		= "MOUNT",
 	},
 };
 
 
-static struct rpc_version	mnt_version1 = {
-		.number		= 1,
-		.nrprocs 	= 2,
-		.procs 		= mnt_procedures
+static struct rpc_version mnt_version1 = {
+	.number		= 1,
+	.nrprocs	= 2,
+	.procs		= mnt_procedures,
 };
 
-static struct rpc_version       mnt_version3 = {
-		.number		= 3,
-		.nrprocs	= 2,
-		.procs		= mnt3_procedures
+static struct rpc_version mnt_version3 = {
+	.number		= 3,
+	.nrprocs	= 2,
+	.procs		= mnt3_procedures,
 };
 
-static struct rpc_version *	mnt_version[] = {
+static struct rpc_version *mnt_version[] = {
 	NULL,
 	&mnt_version1,
 	NULL,
 	&mnt_version3,
 };
 
-static struct rpc_stat		mnt_stats;
+static struct rpc_stat mnt_stats;
 
-static struct rpc_program	mnt_program = {
+static struct rpc_program mnt_program = {
 	.name		= "mount",
 	.number		= NFS_MNT_PROGRAM,
 	.nrvers		= ARRAY_SIZE(mnt_version),
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index cd3ca7b5d3db..7fcc78f2aa71 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -223,7 +223,7 @@ nfs_xdr_diropargs(struct rpc_rqst *req, __be32 *p, struct nfs_diropargs *args)
 static int
 nfs_xdr_readargs(struct rpc_rqst *req, __be32 *p, struct nfs_readargs *args)
 {
-	struct rpc_auth	*auth = req->rq_task->tk_auth;
+	struct rpc_auth	*auth = req->rq_task->tk_msg.rpc_cred->cr_auth;
 	unsigned int replen;
 	u32 offset = (u32)args->offset;
 	u32 count = args->count;
@@ -380,7 +380,7 @@ static int
 nfs_xdr_readdirargs(struct rpc_rqst *req, __be32 *p, struct nfs_readdirargs *args)
 {
 	struct rpc_task	*task = req->rq_task;
-	struct rpc_auth	*auth = task->tk_auth;
+	struct rpc_auth	*auth = task->tk_msg.rpc_cred->cr_auth;
 	unsigned int replen;
 	u32 count = args->count;
 
@@ -541,7 +541,7 @@ nfs_xdr_diropres(struct rpc_rqst *req, __be32 *p, struct nfs_diropok *res)
 static int
 nfs_xdr_readlinkargs(struct rpc_rqst *req, __be32 *p, struct nfs_readlinkargs *args)
 {
-	struct rpc_auth *auth = req->rq_task->tk_auth;
+	struct rpc_auth	*auth = req->rq_task->tk_msg.rpc_cred->cr_auth;
 	unsigned int replen;
 
 	p = xdr_encode_fhandle(p, args->fh);
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 45268d6def2e..814d886b6aa4 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -335,9 +335,7 @@ again:
 		 * not sure this buys us anything (and I'd have
 		 * to revamp the NFSv3 XDR code) */
 		status = nfs3_proc_setattr(dentry, &fattr, sattr);
-		if (status == 0)
-			nfs_setattr_update_inode(dentry->d_inode, sattr);
-		nfs_refresh_inode(dentry->d_inode, &fattr);
+		nfs_post_op_update_inode(dentry->d_inode, &fattr);
 		dprintk("NFS reply setattr (post-create): %d\n", status);
 	}
 	if (status != 0)
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index b51df8eb9f01..b4647a22f349 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -319,7 +319,7 @@ nfs3_xdr_accessargs(struct rpc_rqst *req, __be32 *p, struct nfs3_accessargs *arg
 static int
 nfs3_xdr_readargs(struct rpc_rqst *req, __be32 *p, struct nfs_readargs *args)
 {
-	struct rpc_auth	*auth = req->rq_task->tk_auth;
+	struct rpc_auth	*auth = req->rq_task->tk_msg.rpc_cred->cr_auth;
 	unsigned int replen;
 	u32 count = args->count;
 
@@ -458,7 +458,7 @@ nfs3_xdr_linkargs(struct rpc_rqst *req, __be32 *p, struct nfs3_linkargs *args)
 static int
 nfs3_xdr_readdirargs(struct rpc_rqst *req, __be32 *p, struct nfs3_readdirargs *args)
 {
-	struct rpc_auth	*auth = req->rq_task->tk_auth;
+	struct rpc_auth	*auth = req->rq_task->tk_msg.rpc_cred->cr_auth;
 	unsigned int replen;
 	u32 count = args->count;
 
@@ -643,7 +643,7 @@ static int
 nfs3_xdr_getaclargs(struct rpc_rqst *req, __be32 *p,
 		    struct nfs3_getaclargs *args)
 {
-	struct rpc_auth *auth = req->rq_task->tk_auth;
+	struct rpc_auth	*auth = req->rq_task->tk_msg.rpc_cred->cr_auth;
 	unsigned int replen;
 
 	p = xdr_encode_fhandle(p, args->fh);
@@ -773,7 +773,7 @@ nfs3_xdr_accessres(struct rpc_rqst *req, __be32 *p, struct nfs3_accessres *res)
 static int
 nfs3_xdr_readlinkargs(struct rpc_rqst *req, __be32 *p, struct nfs3_readlinkargs *args)
 {
-	struct rpc_auth *auth = req->rq_task->tk_auth;
+	struct rpc_auth	*auth = req->rq_task->tk_msg.rpc_cred->cr_auth;
 	unsigned int replen;
 
 	p = xdr_encode_fhandle(p, args->fh);
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index cf3a17eb5c09..6c028e734fe6 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -70,19 +70,26 @@ static inline void nfs_confirm_seqid(struct nfs_seqid_counter *seqid, int status
 		seqid->flags |= NFS_SEQID_CONFIRMED;
 }
 
+struct nfs_unique_id {
+	struct rb_node rb_node;
+	__u64 id;
+};
+
 /*
  * NFS4 state_owners and lock_owners are simply labels for ordered
  * sequences of RPC calls. Their sole purpose is to provide once-only
  * semantics by allowing the server to identify replayed requests.
  */
 struct nfs4_state_owner {
-	spinlock_t	     so_lock;
-	struct list_head     so_list;	 /* per-clientid list of state_owners */
+	struct nfs_unique_id so_owner_id;
 	struct nfs_client    *so_client;
-	u32                  so_id;      /* 32-bit identifier, unique */
-	atomic_t	     so_count;
+	struct nfs_server    *so_server;
+	struct rb_node	     so_client_node;
 
 	struct rpc_cred	     *so_cred;	 /* Associated cred */
+
+	spinlock_t	     so_lock;
+	atomic_t	     so_count;
 	struct list_head     so_states;
 	struct list_head     so_delegations;
 	struct nfs_seqid_counter so_seqid;
@@ -108,7 +115,7 @@ struct nfs4_lock_state {
 #define NFS_LOCK_INITIALIZED 1
 	int			ls_flags;
 	struct nfs_seqid_counter	ls_seqid;
-	u32			ls_id;
+	struct nfs_unique_id	ls_id;
 	nfs4_stateid		ls_stateid;
 	atomic_t		ls_count;
 };
@@ -116,7 +123,10 @@ struct nfs4_lock_state {
 /* bits for nfs4_state->flags */
 enum {
 	LK_STATE_IN_USE,
-	NFS_DELEGATED_STATE,
+	NFS_DELEGATED_STATE,		/* Current stateid is delegation */
+	NFS_O_RDONLY_STATE,		/* OPEN stateid has read-only state */
+	NFS_O_WRONLY_STATE,		/* OPEN stateid has write-only state */
+	NFS_O_RDWR_STATE,		/* OPEN stateid has read/write state */
 };
 
 struct nfs4_state {
@@ -130,11 +140,14 @@ struct nfs4_state {
 	unsigned long flags;		/* Do we hold any locks? */
 	spinlock_t state_lock;		/* Protects the lock_states list */
 
-	nfs4_stateid stateid;
+	seqlock_t seqlock;		/* Protects the stateid/open_stateid */
+	nfs4_stateid stateid;		/* Current stateid: may be delegation */
+	nfs4_stateid open_stateid;	/* OPEN stateid */
 
-	unsigned int n_rdonly;
-	unsigned int n_wronly;
-	unsigned int n_rdwr;
+	/* The following 3 fields are protected by owner->so_lock */
+	unsigned int n_rdonly;		/* Number of read-only references */
+	unsigned int n_wronly;		/* Number of write-only references */
+	unsigned int n_rdwr;		/* Number of read/write references */
 	int state;			/* State on the server (R,W, or RW) */
 	atomic_t count;
 };
@@ -165,7 +178,7 @@ extern int nfs4_proc_setclientid(struct nfs_client *, u32, unsigned short, struc
 extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct rpc_cred *);
 extern int nfs4_proc_async_renew(struct nfs_client *, struct rpc_cred *);
 extern int nfs4_proc_renew(struct nfs_client *, struct rpc_cred *);
-extern int nfs4_do_close(struct inode *inode, struct nfs4_state *state);
+extern int nfs4_do_close(struct path *path, struct nfs4_state *state);
 extern struct dentry *nfs4_atomic_open(struct inode *, struct dentry *, struct nameidata *);
 extern int nfs4_open_revalidate(struct inode *, struct dentry *, int, struct nameidata *);
 extern int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle);
@@ -189,14 +202,13 @@ extern void nfs4_renew_state(struct work_struct *);
 
 /* nfs4state.c */
 struct rpc_cred *nfs4_get_renew_cred(struct nfs_client *clp);
-extern u32 nfs4_alloc_lockowner_id(struct nfs_client *);
 
 extern struct nfs4_state_owner * nfs4_get_state_owner(struct nfs_server *, struct rpc_cred *);
 extern void nfs4_put_state_owner(struct nfs4_state_owner *);
 extern void nfs4_drop_state_owner(struct nfs4_state_owner *);
 extern struct nfs4_state * nfs4_get_open_state(struct inode *, struct nfs4_state_owner *);
 extern void nfs4_put_open_state(struct nfs4_state *);
-extern void nfs4_close_state(struct nfs4_state *, mode_t);
+extern void nfs4_close_state(struct path *, struct nfs4_state *, mode_t);
 extern void nfs4_state_set_mode_locked(struct nfs4_state *, mode_t);
 extern void nfs4_schedule_state_recovery(struct nfs_client *);
 extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp);
@@ -222,7 +234,7 @@ extern struct svc_version nfs4_callback_version1;
 
 #else
 
-#define nfs4_close_state(a, b) do { } while (0)
+#define nfs4_close_state(a, b, c) do { } while (0)
 
 #endif /* CONFIG_NFS_V4 */
 #endif /* __LINUX_FS_NFS_NFS4_FS.H */
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 648e0ac0f90e..fee2da856c95 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -65,6 +65,7 @@ static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *)
 static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry);
 static int nfs4_handle_exception(const struct nfs_server *server, int errorcode, struct nfs4_exception *exception);
 static int nfs4_wait_clnt_recover(struct rpc_clnt *clnt, struct nfs_client *clp);
+static int _nfs4_do_access(struct inode *inode, struct rpc_cred *cred, int openflags);
 
 /* Prevent leaks of NFSv4 errors into userland */
 int nfs4_map_errors(int err)
@@ -214,27 +215,39 @@ static void update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo)
 }
 
 struct nfs4_opendata {
-	atomic_t count;
+	struct kref kref;
 	struct nfs_openargs o_arg;
 	struct nfs_openres o_res;
 	struct nfs_open_confirmargs c_arg;
 	struct nfs_open_confirmres c_res;
 	struct nfs_fattr f_attr;
 	struct nfs_fattr dir_attr;
-	struct dentry *dentry;
+	struct path path;
 	struct dentry *dir;
 	struct nfs4_state_owner *owner;
+	struct nfs4_state *state;
 	struct iattr attrs;
 	unsigned long timestamp;
+	unsigned int rpc_done : 1;
 	int rpc_status;
 	int cancelled;
 };
 
-static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry,
+
+static void nfs4_init_opendata_res(struct nfs4_opendata *p)
+{
+	p->o_res.f_attr = &p->f_attr;
+	p->o_res.dir_attr = &p->dir_attr;
+	p->o_res.server = p->o_arg.server;
+	nfs_fattr_init(&p->f_attr);
+	nfs_fattr_init(&p->dir_attr);
+}
+
+static struct nfs4_opendata *nfs4_opendata_alloc(struct path *path,
 		struct nfs4_state_owner *sp, int flags,
 		const struct iattr *attrs)
 {
-	struct dentry *parent = dget_parent(dentry);
+	struct dentry *parent = dget_parent(path->dentry);
 	struct inode *dir = parent->d_inode;
 	struct nfs_server *server = NFS_SERVER(dir);
 	struct nfs4_opendata *p;
@@ -245,24 +258,19 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry,
 	p->o_arg.seqid = nfs_alloc_seqid(&sp->so_seqid);
 	if (p->o_arg.seqid == NULL)
 		goto err_free;
-	atomic_set(&p->count, 1);
-	p->dentry = dget(dentry);
+	p->path.mnt = mntget(path->mnt);
+	p->path.dentry = dget(path->dentry);
 	p->dir = parent;
 	p->owner = sp;
 	atomic_inc(&sp->so_count);
 	p->o_arg.fh = NFS_FH(dir);
 	p->o_arg.open_flags = flags,
 	p->o_arg.clientid = server->nfs_client->cl_clientid;
-	p->o_arg.id = sp->so_id;
-	p->o_arg.name = &dentry->d_name;
+	p->o_arg.id = sp->so_owner_id.id;
+	p->o_arg.name = &p->path.dentry->d_name;
 	p->o_arg.server = server;
 	p->o_arg.bitmask = server->attr_bitmask;
 	p->o_arg.claim = NFS4_OPEN_CLAIM_NULL;
-	p->o_res.f_attr = &p->f_attr;
-	p->o_res.dir_attr = &p->dir_attr;
-	p->o_res.server = server;
-	nfs_fattr_init(&p->f_attr);
-	nfs_fattr_init(&p->dir_attr);
 	if (flags & O_EXCL) {
 		u32 *s = (u32 *) p->o_arg.u.verifier.data;
 		s[0] = jiffies;
@@ -274,6 +282,8 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry,
 	p->c_arg.fh = &p->o_res.fh;
 	p->c_arg.stateid = &p->o_res.stateid;
 	p->c_arg.seqid = p->o_arg.seqid;
+	nfs4_init_opendata_res(p);
+	kref_init(&p->kref);
 	return p;
 err_free:
 	kfree(p);
@@ -282,27 +292,25 @@ err:
 	return NULL;
 }
 
-static void nfs4_opendata_free(struct nfs4_opendata *p)
+static void nfs4_opendata_free(struct kref *kref)
 {
-	if (p != NULL && atomic_dec_and_test(&p->count)) {
-		nfs_free_seqid(p->o_arg.seqid);
-		nfs4_put_state_owner(p->owner);
-		dput(p->dir);
-		dput(p->dentry);
-		kfree(p);
-	}
+	struct nfs4_opendata *p = container_of(kref,
+			struct nfs4_opendata, kref);
+
+	nfs_free_seqid(p->o_arg.seqid);
+	if (p->state != NULL)
+		nfs4_put_open_state(p->state);
+	nfs4_put_state_owner(p->owner);
+	dput(p->dir);
+	dput(p->path.dentry);
+	mntput(p->path.mnt);
+	kfree(p);
 }
 
-/* Helper for asynchronous RPC calls */
-static int nfs4_call_async(struct rpc_clnt *clnt,
-		const struct rpc_call_ops *tk_ops, void *calldata)
+static void nfs4_opendata_put(struct nfs4_opendata *p)
 {
-	struct rpc_task *task;
-
-	if (!(task = rpc_new_task(clnt, RPC_TASK_ASYNC, tk_ops, calldata)))
-		return -ENOMEM;
-	rpc_execute(task);
-	return 0;
+	if (p != NULL)
+		kref_put(&p->kref, nfs4_opendata_free);
 }
 
 static int nfs4_wait_for_completion_rpc_task(struct rpc_task *task)
@@ -316,7 +324,34 @@ static int nfs4_wait_for_completion_rpc_task(struct rpc_task *task)
 	return ret;
 }
 
-static inline void update_open_stateflags(struct nfs4_state *state, mode_t open_flags)
+static int can_open_cached(struct nfs4_state *state, int mode)
+{
+	int ret = 0;
+	switch (mode & (FMODE_READ|FMODE_WRITE|O_EXCL)) {
+		case FMODE_READ:
+			ret |= test_bit(NFS_O_RDONLY_STATE, &state->flags) != 0;
+			ret |= test_bit(NFS_O_RDWR_STATE, &state->flags) != 0;
+			break;
+		case FMODE_WRITE:
+			ret |= test_bit(NFS_O_WRONLY_STATE, &state->flags) != 0;
+			ret |= test_bit(NFS_O_RDWR_STATE, &state->flags) != 0;
+			break;
+		case FMODE_READ|FMODE_WRITE:
+			ret |= test_bit(NFS_O_RDWR_STATE, &state->flags) != 0;
+	}
+	return ret;
+}
+
+static int can_open_delegated(struct nfs_delegation *delegation, mode_t open_flags)
+{
+	if ((delegation->type & open_flags) != open_flags)
+		return 0;
+	if (delegation->flags & NFS_DELEGATION_NEED_RECLAIM)
+		return 0;
+	return 1;
+}
+
+static void update_open_stateflags(struct nfs4_state *state, mode_t open_flags)
 {
 	switch (open_flags) {
 		case FMODE_WRITE:
@@ -328,41 +363,176 @@ static inline void update_open_stateflags(struct nfs4_state *state, mode_t open_
 		case FMODE_READ|FMODE_WRITE:
 			state->n_rdwr++;
 	}
+	nfs4_state_set_mode_locked(state, state->state | open_flags);
 }
 
-static void update_open_stateid(struct nfs4_state *state, nfs4_stateid *stateid, int open_flags)
+static void nfs_set_open_stateid_locked(struct nfs4_state *state, nfs4_stateid *stateid, int open_flags)
 {
-	struct inode *inode = state->inode;
+	if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0)
+		memcpy(state->stateid.data, stateid->data, sizeof(state->stateid.data));
+	memcpy(state->open_stateid.data, stateid->data, sizeof(state->open_stateid.data));
+	switch (open_flags) {
+		case FMODE_READ:
+			set_bit(NFS_O_RDONLY_STATE, &state->flags);
+			break;
+		case FMODE_WRITE:
+			set_bit(NFS_O_WRONLY_STATE, &state->flags);
+			break;
+		case FMODE_READ|FMODE_WRITE:
+			set_bit(NFS_O_RDWR_STATE, &state->flags);
+	}
+}
+
+static void nfs_set_open_stateid(struct nfs4_state *state, nfs4_stateid *stateid, int open_flags)
+{
+	write_seqlock(&state->seqlock);
+	nfs_set_open_stateid_locked(state, stateid, open_flags);
+	write_sequnlock(&state->seqlock);
+}
 
+static void update_open_stateid(struct nfs4_state *state, nfs4_stateid *open_stateid, nfs4_stateid *deleg_stateid, int open_flags)
+{
 	open_flags &= (FMODE_READ|FMODE_WRITE);
-	/* Protect against nfs4_find_state_byowner() */
+	/*
+	 * Protect the call to nfs4_state_set_mode_locked and
+	 * serialise the stateid update
+	 */
+	write_seqlock(&state->seqlock);
+	if (deleg_stateid != NULL) {
+		memcpy(state->stateid.data, deleg_stateid->data, sizeof(state->stateid.data));
+		set_bit(NFS_DELEGATED_STATE, &state->flags);
+	}
+	if (open_stateid != NULL)
+		nfs_set_open_stateid_locked(state, open_stateid, open_flags);
+	write_sequnlock(&state->seqlock);
 	spin_lock(&state->owner->so_lock);
-	spin_lock(&inode->i_lock);
-	memcpy(&state->stateid, stateid, sizeof(state->stateid));
 	update_open_stateflags(state, open_flags);
-	nfs4_state_set_mode_locked(state, state->state | open_flags);
-	spin_unlock(&inode->i_lock);
 	spin_unlock(&state->owner->so_lock);
 }
 
+static void nfs4_return_incompatible_delegation(struct inode *inode, mode_t open_flags)
+{
+	struct nfs_delegation *delegation;
+
+	rcu_read_lock();
+	delegation = rcu_dereference(NFS_I(inode)->delegation);
+	if (delegation == NULL || (delegation->type & open_flags) == open_flags) {
+		rcu_read_unlock();
+		return;
+	}
+	rcu_read_unlock();
+	nfs_inode_return_delegation(inode);
+}
+
+static struct nfs4_state *nfs4_try_open_cached(struct nfs4_opendata *opendata)
+{
+	struct nfs4_state *state = opendata->state;
+	struct nfs_inode *nfsi = NFS_I(state->inode);
+	struct nfs_delegation *delegation;
+	int open_mode = opendata->o_arg.open_flags & (FMODE_READ|FMODE_WRITE|O_EXCL);
+	nfs4_stateid stateid;
+	int ret = -EAGAIN;
+
+	rcu_read_lock();
+	delegation = rcu_dereference(nfsi->delegation);
+	for (;;) {
+		if (can_open_cached(state, open_mode)) {
+			spin_lock(&state->owner->so_lock);
+			if (can_open_cached(state, open_mode)) {
+				update_open_stateflags(state, open_mode);
+				spin_unlock(&state->owner->so_lock);
+				rcu_read_unlock();
+				goto out_return_state;
+			}
+			spin_unlock(&state->owner->so_lock);
+		}
+		if (delegation == NULL)
+			break;
+		if (!can_open_delegated(delegation, open_mode))
+			break;
+		/* Save the delegation */
+		memcpy(stateid.data, delegation->stateid.data, sizeof(stateid.data));
+		rcu_read_unlock();
+		lock_kernel();
+		ret = _nfs4_do_access(state->inode, state->owner->so_cred, open_mode);
+		unlock_kernel();
+		if (ret != 0)
+			goto out;
+		ret = -EAGAIN;
+		rcu_read_lock();
+		delegation = rcu_dereference(nfsi->delegation);
+		/* If no delegation, try a cached open */
+		if (delegation == NULL)
+			continue;
+		/* Is the delegation still valid? */
+		if (memcmp(stateid.data, delegation->stateid.data, sizeof(stateid.data)) != 0)
+			continue;
+		rcu_read_unlock();
+		update_open_stateid(state, NULL, &stateid, open_mode);
+		goto out_return_state;
+	}
+	rcu_read_unlock();
+out:
+	return ERR_PTR(ret);
+out_return_state:
+	atomic_inc(&state->count);
+	return state;
+}
+
 static struct nfs4_state *nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data)
 {
 	struct inode *inode;
 	struct nfs4_state *state = NULL;
+	struct nfs_delegation *delegation;
+	nfs4_stateid *deleg_stateid = NULL;
+	int ret;
 
-	if (!(data->f_attr.valid & NFS_ATTR_FATTR))
+	if (!data->rpc_done) {
+		state = nfs4_try_open_cached(data);
 		goto out;
+	}
+
+	ret = -EAGAIN;
+	if (!(data->f_attr.valid & NFS_ATTR_FATTR))
+		goto err;
 	inode = nfs_fhget(data->dir->d_sb, &data->o_res.fh, &data->f_attr);
+	ret = PTR_ERR(inode);
 	if (IS_ERR(inode))
-		goto out;
+		goto err;
+	ret = -ENOMEM;
 	state = nfs4_get_open_state(inode, data->owner);
 	if (state == NULL)
-		goto put_inode;
-	update_open_stateid(state, &data->o_res.stateid, data->o_arg.open_flags);
-put_inode:
+		goto err_put_inode;
+	if (data->o_res.delegation_type != 0) {
+		int delegation_flags = 0;
+
+		rcu_read_lock();
+		delegation = rcu_dereference(NFS_I(inode)->delegation);
+		if (delegation)
+			delegation_flags = delegation->flags;
+		rcu_read_unlock();
+		if (!(delegation_flags & NFS_DELEGATION_NEED_RECLAIM))
+			nfs_inode_set_delegation(state->inode,
+					data->owner->so_cred,
+					&data->o_res);
+		else
+			nfs_inode_reclaim_delegation(state->inode,
+					data->owner->so_cred,
+					&data->o_res);
+	}
+	rcu_read_lock();
+	delegation = rcu_dereference(NFS_I(inode)->delegation);
+	if (delegation != NULL)
+		deleg_stateid = &delegation->stateid;
+	update_open_stateid(state, &data->o_res.stateid, deleg_stateid, data->o_arg.open_flags);
+	rcu_read_unlock();
 	iput(inode);
 out:
 	return state;
+err_put_inode:
+	iput(inode);
+err:
+	return ERR_PTR(ret);
 }
 
 static struct nfs_open_context *nfs4_state_find_open_context(struct nfs4_state *state)
@@ -382,79 +552,66 @@ static struct nfs_open_context *nfs4_state_find_open_context(struct nfs4_state *
 	return ERR_PTR(-ENOENT);
 }
 
-static int nfs4_open_recover_helper(struct nfs4_opendata *opendata, mode_t openflags, nfs4_stateid *stateid)
+static int nfs4_open_recover_helper(struct nfs4_opendata *opendata, mode_t openflags, struct nfs4_state **res)
 {
+	struct nfs4_state *newstate;
 	int ret;
 
 	opendata->o_arg.open_flags = openflags;
+	memset(&opendata->o_res, 0, sizeof(opendata->o_res));
+	memset(&opendata->c_res, 0, sizeof(opendata->c_res));
+	nfs4_init_opendata_res(opendata);
 	ret = _nfs4_proc_open(opendata);
 	if (ret != 0)
 		return ret; 
-	memcpy(stateid->data, opendata->o_res.stateid.data,
-			sizeof(stateid->data));
+	newstate = nfs4_opendata_to_nfs4_state(opendata);
+	if (IS_ERR(newstate))
+		return PTR_ERR(newstate);
+	nfs4_close_state(&opendata->path, newstate, openflags);
+	*res = newstate;
 	return 0;
 }
 
 static int nfs4_open_recover(struct nfs4_opendata *opendata, struct nfs4_state *state)
 {
-	nfs4_stateid stateid;
 	struct nfs4_state *newstate;
-	int mode = 0;
-	int delegation = 0;
 	int ret;
 
 	/* memory barrier prior to reading state->n_* */
+	clear_bit(NFS_DELEGATED_STATE, &state->flags);
 	smp_rmb();
 	if (state->n_rdwr != 0) {
-		ret = nfs4_open_recover_helper(opendata, FMODE_READ|FMODE_WRITE, &stateid);
+		ret = nfs4_open_recover_helper(opendata, FMODE_READ|FMODE_WRITE, &newstate);
 		if (ret != 0)
 			return ret;
-		mode |= FMODE_READ|FMODE_WRITE;
-		if (opendata->o_res.delegation_type != 0)
-			delegation = opendata->o_res.delegation_type;
-		smp_rmb();
+		if (newstate != state)
+			return -ESTALE;
 	}
 	if (state->n_wronly != 0) {
-		ret = nfs4_open_recover_helper(opendata, FMODE_WRITE, &stateid);
+		ret = nfs4_open_recover_helper(opendata, FMODE_WRITE, &newstate);
 		if (ret != 0)
 			return ret;
-		mode |= FMODE_WRITE;
-		if (opendata->o_res.delegation_type != 0)
-			delegation = opendata->o_res.delegation_type;
-		smp_rmb();
+		if (newstate != state)
+			return -ESTALE;
 	}
 	if (state->n_rdonly != 0) {
-		ret = nfs4_open_recover_helper(opendata, FMODE_READ, &stateid);
+		ret = nfs4_open_recover_helper(opendata, FMODE_READ, &newstate);
 		if (ret != 0)
 			return ret;
-		mode |= FMODE_READ;
+		if (newstate != state)
+			return -ESTALE;
 	}
-	clear_bit(NFS_DELEGATED_STATE, &state->flags);
-	if (mode == 0)
-		return 0;
-	if (opendata->o_res.delegation_type == 0)
-		opendata->o_res.delegation_type = delegation;
-	opendata->o_arg.open_flags |= mode;
-	newstate = nfs4_opendata_to_nfs4_state(opendata);
-	if (newstate != NULL) {
-		if (opendata->o_res.delegation_type != 0) {
-			struct nfs_inode *nfsi = NFS_I(newstate->inode);
-			int delegation_flags = 0;
-			if (nfsi->delegation)
-				delegation_flags = nfsi->delegation->flags;
-			if (!(delegation_flags & NFS_DELEGATION_NEED_RECLAIM))
-				nfs_inode_set_delegation(newstate->inode,
-						opendata->owner->so_cred,
-						&opendata->o_res);
-			else
-				nfs_inode_reclaim_delegation(newstate->inode,
-						opendata->owner->so_cred,
-						&opendata->o_res);
-		}
-		nfs4_close_state(newstate, opendata->o_arg.open_flags);
+	/*
+	 * We may have performed cached opens for all three recoveries.
+	 * Check if we need to update the current stateid.
+	 */
+	if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0 &&
+	    memcmp(state->stateid.data, state->open_stateid.data, sizeof(state->stateid.data)) != 0) {
+		write_seqlock(&state->seqlock);
+		if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0)
+			memcpy(state->stateid.data, state->open_stateid.data, sizeof(state->stateid.data));
+		write_sequnlock(&state->seqlock);
 	}
-	if (newstate != state)
-		return -ESTALE;
 	return 0;
 }
 
@@ -462,41 +619,37 @@ static int nfs4_open_recover(struct nfs4_opendata *opendata, struct nfs4_state *
  * OPEN_RECLAIM:
  * 	reclaim state on the server after a reboot.
  */
-static int _nfs4_do_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *state, struct dentry *dentry)
+static int _nfs4_do_open_reclaim(struct nfs_open_context *ctx, struct nfs4_state *state)
 {
-	struct nfs_delegation *delegation = NFS_I(state->inode)->delegation;
+	struct nfs_delegation *delegation;
 	struct nfs4_opendata *opendata;
 	int delegation_type = 0;
 	int status;
 
-	if (delegation != NULL) {
-		if (!(delegation->flags & NFS_DELEGATION_NEED_RECLAIM)) {
-			memcpy(&state->stateid, &delegation->stateid,
-					sizeof(state->stateid));
-			set_bit(NFS_DELEGATED_STATE, &state->flags);
-			return 0;
-		}
-		delegation_type = delegation->type;
-	}
-	opendata = nfs4_opendata_alloc(dentry, sp, 0, NULL);
+	opendata = nfs4_opendata_alloc(&ctx->path, state->owner, 0, NULL);
 	if (opendata == NULL)
 		return -ENOMEM;
 	opendata->o_arg.claim = NFS4_OPEN_CLAIM_PREVIOUS;
 	opendata->o_arg.fh = NFS_FH(state->inode);
 	nfs_copy_fh(&opendata->o_res.fh, opendata->o_arg.fh);
+	rcu_read_lock();
+	delegation = rcu_dereference(NFS_I(state->inode)->delegation);
+	if (delegation != NULL && (delegation->flags & NFS_DELEGATION_NEED_RECLAIM) != 0)
+		delegation_type = delegation->flags;
+	rcu_read_unlock();
 	opendata->o_arg.u.delegation_type = delegation_type;
 	status = nfs4_open_recover(opendata, state);
-	nfs4_opendata_free(opendata);
+	nfs4_opendata_put(opendata);
 	return status;
 }
 
-static int nfs4_do_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *state, struct dentry *dentry)
+static int nfs4_do_open_reclaim(struct nfs_open_context *ctx, struct nfs4_state *state)
 {
 	struct nfs_server *server = NFS_SERVER(state->inode);
 	struct nfs4_exception exception = { };
 	int err;
 	do {
-		err = _nfs4_do_open_reclaim(sp, state, dentry);
+		err = _nfs4_do_open_reclaim(ctx, state);
 		if (err != -NFS4ERR_DELAY)
 			break;
 		nfs4_handle_exception(server, err, &exception);
@@ -512,37 +665,35 @@ static int nfs4_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *sta
 	ctx = nfs4_state_find_open_context(state);
 	if (IS_ERR(ctx))
 		return PTR_ERR(ctx);
-	ret = nfs4_do_open_reclaim(sp, state, ctx->dentry);
+	ret = nfs4_do_open_reclaim(ctx, state);
 	put_nfs_open_context(ctx);
 	return ret;
 }
 
-static int _nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state *state)
+static int _nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid)
 {
 	struct nfs4_state_owner  *sp  = state->owner;
 	struct nfs4_opendata *opendata;
 	int ret;
 
-	if (!test_bit(NFS_DELEGATED_STATE, &state->flags))
-		return 0;
-	opendata = nfs4_opendata_alloc(dentry, sp, 0, NULL);
+	opendata = nfs4_opendata_alloc(&ctx->path, sp, 0, NULL);
 	if (opendata == NULL)
 		return -ENOMEM;
 	opendata->o_arg.claim = NFS4_OPEN_CLAIM_DELEGATE_CUR;
-	memcpy(opendata->o_arg.u.delegation.data, state->stateid.data,
+	memcpy(opendata->o_arg.u.delegation.data, stateid->data,
 			sizeof(opendata->o_arg.u.delegation.data));
 	ret = nfs4_open_recover(opendata, state);
-	nfs4_opendata_free(opendata);
+	nfs4_opendata_put(opendata);
 	return ret;
 }
 
-int nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state *state)
+int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid)
 {
 	struct nfs4_exception exception = { };
-	struct nfs_server *server = NFS_SERVER(dentry->d_inode);
+	struct nfs_server *server = NFS_SERVER(state->inode);
 	int err;
 	do {
-		err = _nfs4_open_delegation_recall(dentry, state);
+		err = _nfs4_open_delegation_recall(ctx, state, stateid);
 		switch (err) {
 			case 0:
 				return err;
@@ -582,9 +733,10 @@ static void nfs4_open_confirm_done(struct rpc_task *task, void *calldata)
 		memcpy(data->o_res.stateid.data, data->c_res.stateid.data,
 				sizeof(data->o_res.stateid.data));
 		renew_lease(data->o_res.server, data->timestamp);
+		data->rpc_done = 1;
 	}
-	nfs_increment_open_seqid(data->rpc_status, data->c_arg.seqid);
 	nfs_confirm_seqid(&data->owner->so_seqid, data->rpc_status);
+	nfs_increment_open_seqid(data->rpc_status, data->c_arg.seqid);
 }
 
 static void nfs4_open_confirm_release(void *calldata)
@@ -596,14 +748,14 @@ static void nfs4_open_confirm_release(void *calldata)
 	if (data->cancelled == 0)
 		goto out_free;
 	/* In case of error, no cleanup! */
-	if (data->rpc_status != 0)
+	if (!data->rpc_done)
 		goto out_free;
 	nfs_confirm_seqid(&data->owner->so_seqid, 0);
 	state = nfs4_opendata_to_nfs4_state(data);
-	if (state != NULL)
-		nfs4_close_state(state, data->o_arg.open_flags);
+	if (!IS_ERR(state))
+		nfs4_close_state(&data->path, state, data->o_arg.open_flags);
 out_free:
-	nfs4_opendata_free(data);
+	nfs4_opendata_put(data);
 }
 
 static const struct rpc_call_ops nfs4_open_confirm_ops = {
@@ -621,12 +773,9 @@ static int _nfs4_proc_open_confirm(struct nfs4_opendata *data)
 	struct rpc_task *task;
 	int status;
 
-	atomic_inc(&data->count);
-	/*
-	 * If rpc_run_task() ends up calling ->rpc_release(), we
-	 * want to ensure that it takes the 'error' code path.
-	 */
-	data->rpc_status = -ENOMEM;
+	kref_get(&data->kref);
+	data->rpc_done = 0;
+	data->rpc_status = 0;
 	task = rpc_run_task(server->client, RPC_TASK_ASYNC, &nfs4_open_confirm_ops, data);
 	if (IS_ERR(task))
 		return PTR_ERR(task);
@@ -653,13 +802,35 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata)
 	
 	if (nfs_wait_on_sequence(data->o_arg.seqid, task) != 0)
 		return;
+	/*
+	 * Check if we still need to send an OPEN call, or if we can use
+	 * a delegation instead.
+	 */
+	if (data->state != NULL) {
+		struct nfs_delegation *delegation;
+
+		if (can_open_cached(data->state, data->o_arg.open_flags & (FMODE_READ|FMODE_WRITE|O_EXCL)))
+			goto out_no_action;
+		rcu_read_lock();
+		delegation = rcu_dereference(NFS_I(data->state->inode)->delegation);
+		if (delegation != NULL &&
+		   (delegation->flags & NFS_DELEGATION_NEED_RECLAIM) == 0) {
+			rcu_read_unlock();
+			goto out_no_action;
+		}
+		rcu_read_unlock();
+	}
 	/* Update sequence id. */
-	data->o_arg.id = sp->so_id;
+	data->o_arg.id = sp->so_owner_id.id;
 	data->o_arg.clientid = sp->so_client->cl_clientid;
 	if (data->o_arg.claim == NFS4_OPEN_CLAIM_PREVIOUS)
 		msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_NOATTR];
 	data->timestamp = jiffies;
 	rpc_call_setup(task, &msg, 0);
+	return;
+out_no_action:
+	task->tk_action = NULL;
+
 }
 
 static void nfs4_open_done(struct rpc_task *task, void *calldata)
@@ -683,8 +854,11 @@ static void nfs4_open_done(struct rpc_task *task, void *calldata)
 				data->rpc_status = -ENOTDIR;
 		}
 		renew_lease(data->o_res.server, data->timestamp);
+		if (!(data->o_res.rflags & NFS4_OPEN_RESULT_CONFIRM))
+			nfs_confirm_seqid(&data->owner->so_seqid, 0);
 	}
 	nfs_increment_open_seqid(data->rpc_status, data->o_arg.seqid);
+	data->rpc_done = 1;
 }
 
 static void nfs4_open_release(void *calldata)
@@ -696,17 +870,17 @@ static void nfs4_open_release(void *calldata)
 	if (data->cancelled == 0)
 		goto out_free;
 	/* In case of error, no cleanup! */
-	if (data->rpc_status != 0)
+	if (data->rpc_status != 0 || !data->rpc_done)
 		goto out_free;
 	/* In case we need an open_confirm, no cleanup! */
 	if (data->o_res.rflags & NFS4_OPEN_RESULT_CONFIRM)
 		goto out_free;
 	nfs_confirm_seqid(&data->owner->so_seqid, 0);
 	state = nfs4_opendata_to_nfs4_state(data);
-	if (state != NULL)
-		nfs4_close_state(state, data->o_arg.open_flags);
+	if (!IS_ERR(state))
+		nfs4_close_state(&data->path, state, data->o_arg.open_flags);
 out_free:
-	nfs4_opendata_free(data);
+	nfs4_opendata_put(data);
 }
 
 static const struct rpc_call_ops nfs4_open_ops = {
@@ -727,12 +901,10 @@ static int _nfs4_proc_open(struct nfs4_opendata *data)
 	struct rpc_task *task;
 	int status;
 
-	atomic_inc(&data->count);
-	/*
-	 * If rpc_run_task() ends up calling ->rpc_release(), we
-	 * want to ensure that it takes the 'error' code path.
-	 */
-	data->rpc_status = -ENOMEM;
+	kref_get(&data->kref);
+	data->rpc_done = 0;
+	data->rpc_status = 0;
+	data->cancelled = 0;
 	task = rpc_run_task(server->client, RPC_TASK_ASYNC, &nfs4_open_ops, data);
 	if (IS_ERR(task))
 		return PTR_ERR(task);
@@ -743,7 +915,7 @@ static int _nfs4_proc_open(struct nfs4_opendata *data)
 	} else
 		status = data->rpc_status;
 	rpc_put_task(task);
-	if (status != 0)
+	if (status != 0 || !data->rpc_done)
 		return status;
 
 	if (o_arg->open_flags & O_CREAT) {
@@ -756,7 +928,6 @@ static int _nfs4_proc_open(struct nfs4_opendata *data)
 		if (status != 0)
 			return status;
 	}
-	nfs_confirm_seqid(&data->owner->so_seqid, 0);
 	if (!(o_res->f_attr->valid & NFS_ATTR_FATTR))
 		return server->nfs_client->rpc_ops->getattr(server, &o_res->fh, o_res->f_attr);
 	return 0;
@@ -772,6 +943,8 @@ static int _nfs4_do_access(struct inode *inode, struct rpc_cred *cred, int openf
 		mask |= MAY_READ;
 	if (openflags & FMODE_WRITE)
 		mask |= MAY_WRITE;
+	if (openflags & FMODE_EXEC)
+		mask |= MAY_EXEC;
 	status = nfs_access_get_cached(inode, cred, &cache);
 	if (status == 0)
 		goto out;
@@ -811,43 +984,32 @@ static int nfs4_recover_expired_lease(struct nfs_server *server)
  * 	reclaim state on the server after a network partition.
  * 	Assumes caller holds the appropriate lock
  */
-static int _nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *state, struct dentry *dentry)
+static int _nfs4_open_expired(struct nfs_open_context *ctx, struct nfs4_state *state)
 {
-	struct inode *inode = state->inode;
-	struct nfs_delegation *delegation = NFS_I(inode)->delegation;
 	struct nfs4_opendata *opendata;
-	int openflags = state->state & (FMODE_READ|FMODE_WRITE);
 	int ret;
 
-	if (delegation != NULL && !(delegation->flags & NFS_DELEGATION_NEED_RECLAIM)) {
-		ret = _nfs4_do_access(inode, sp->so_cred, openflags);
-		if (ret < 0)
-			return ret;
-		memcpy(&state->stateid, &delegation->stateid, sizeof(state->stateid));
-		set_bit(NFS_DELEGATED_STATE, &state->flags);
-		return 0;
-	}
-	opendata = nfs4_opendata_alloc(dentry, sp, openflags, NULL);
+	opendata = nfs4_opendata_alloc(&ctx->path, state->owner, 0, NULL);
 	if (opendata == NULL)
 		return -ENOMEM;
 	ret = nfs4_open_recover(opendata, state);
 	if (ret == -ESTALE) {
 		/* Invalidate the state owner so we don't ever use it again */
-		nfs4_drop_state_owner(sp);
-		d_drop(dentry);
+		nfs4_drop_state_owner(state->owner);
+		d_drop(ctx->path.dentry);
 	}
-	nfs4_opendata_free(opendata);
+	nfs4_opendata_put(opendata);
 	return ret;
 }
 
-static inline int nfs4_do_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *state, struct dentry *dentry)
+static inline int nfs4_do_open_expired(struct nfs_open_context *ctx, struct nfs4_state *state)
 {
-	struct nfs_server *server = NFS_SERVER(dentry->d_inode);
+	struct nfs_server *server = NFS_SERVER(state->inode);
 	struct nfs4_exception exception = { };
 	int err;
 
 	do {
-		err = _nfs4_open_expired(sp, state, dentry);
+		err = _nfs4_open_expired(ctx, state);
 		if (err == -NFS4ERR_DELAY)
 			nfs4_handle_exception(server, err, &exception);
 	} while (exception.retry);
@@ -862,107 +1024,38 @@ static int nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *sta
 	ctx = nfs4_state_find_open_context(state);
 	if (IS_ERR(ctx))
 		return PTR_ERR(ctx);
-	ret = nfs4_do_open_expired(sp, state, ctx->dentry);
+	ret = nfs4_do_open_expired(ctx, state);
 	put_nfs_open_context(ctx);
 	return ret;
 }
 
 /*
- * Returns a referenced nfs4_state if there is an open delegation on the file
+ * on an EXCLUSIVE create, the server should send back a bitmask with FATTR4-*
+ * fields corresponding to attributes that were used to store the verifier.
+ * Make sure we clobber those fields in the later setattr call
  */
-static int _nfs4_open_delegated(struct inode *inode, int flags, struct rpc_cred *cred, struct nfs4_state **res)
-{
-	struct nfs_delegation *delegation;
-	struct nfs_server *server = NFS_SERVER(inode);
-	struct nfs_client *clp = server->nfs_client;
-	struct nfs_inode *nfsi = NFS_I(inode);
-	struct nfs4_state_owner *sp = NULL;
-	struct nfs4_state *state = NULL;
-	int open_flags = flags & (FMODE_READ|FMODE_WRITE);
-	int err;
-
-	err = -ENOMEM;
-	if (!(sp = nfs4_get_state_owner(server, cred))) {
-		dprintk("%s: nfs4_get_state_owner failed!\n", __FUNCTION__);
-		return err;
-	}
-	err = nfs4_recover_expired_lease(server);
-	if (err != 0)
-		goto out_put_state_owner;
-	/* Protect against reboot recovery - NOTE ORDER! */
-	down_read(&clp->cl_sem);
-	/* Protect against delegation recall */
-	down_read(&nfsi->rwsem);
-	delegation = NFS_I(inode)->delegation;
-	err = -ENOENT;
-	if (delegation == NULL || (delegation->type & open_flags) != open_flags)
-		goto out_err;
-	err = -ENOMEM;
-	state = nfs4_get_open_state(inode, sp);
-	if (state == NULL)
-		goto out_err;
-
-	err = -ENOENT;
-	if ((state->state & open_flags) == open_flags) {
-		spin_lock(&inode->i_lock);
-		update_open_stateflags(state, open_flags);
-		spin_unlock(&inode->i_lock);
-		goto out_ok;
-	} else if (state->state != 0)
-		goto out_put_open_state;
-
-	lock_kernel();
-	err = _nfs4_do_access(inode, cred, open_flags);
-	unlock_kernel();
-	if (err != 0)
-		goto out_put_open_state;
-	set_bit(NFS_DELEGATED_STATE, &state->flags);
-	update_open_stateid(state, &delegation->stateid, open_flags);
-out_ok:
-	nfs4_put_state_owner(sp);
-	up_read(&nfsi->rwsem);
-	up_read(&clp->cl_sem);
-	*res = state;
-	return 0;
-out_put_open_state:
-	nfs4_put_open_state(state);
-out_err:
-	up_read(&nfsi->rwsem);
-	up_read(&clp->cl_sem);
-	if (err != -EACCES)
-		nfs_inode_return_delegation(inode);
-out_put_state_owner:
-	nfs4_put_state_owner(sp);
-	return err;
-}
-
-static struct nfs4_state *nfs4_open_delegated(struct inode *inode, int flags, struct rpc_cred *cred)
+static inline void nfs4_exclusive_attrset(struct nfs4_opendata *opendata, struct iattr *sattr)
 {
-	struct nfs4_exception exception = { };
-	struct nfs4_state *res = ERR_PTR(-EIO);
-	int err;
+	if ((opendata->o_res.attrset[1] & FATTR4_WORD1_TIME_ACCESS) &&
+	    !(sattr->ia_valid & ATTR_ATIME_SET))
+		sattr->ia_valid |= ATTR_ATIME;
 
-	do {
-		err = _nfs4_open_delegated(inode, flags, cred, &res);
-		if (err == 0)
-			break;
-		res = ERR_PTR(nfs4_handle_exception(NFS_SERVER(inode),
-					err, &exception));
-	} while (exception.retry);
-	return res;
+	if ((opendata->o_res.attrset[1] & FATTR4_WORD1_TIME_MODIFY) &&
+	    !(sattr->ia_valid & ATTR_MTIME_SET))
+		sattr->ia_valid |= ATTR_MTIME;
 }
 
 /*
  * Returns a referenced nfs4_state
  */
-static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, int flags, struct iattr *sattr, struct rpc_cred *cred, struct nfs4_state **res)
+static int _nfs4_do_open(struct inode *dir, struct path *path, int flags, struct iattr *sattr, struct rpc_cred *cred, struct nfs4_state **res)
 {
 	struct nfs4_state_owner  *sp;
 	struct nfs4_state     *state = NULL;
 	struct nfs_server       *server = NFS_SERVER(dir);
 	struct nfs_client *clp = server->nfs_client;
 	struct nfs4_opendata *opendata;
-	int                     status;
+	int status;
 
 	/* Protect against reboot recovery conflicts */
 	status = -ENOMEM;
@@ -973,29 +1066,35 @@ static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, int flags, st
 	status = nfs4_recover_expired_lease(server);
 	if (status != 0)
 		goto err_put_state_owner;
+	if (path->dentry->d_inode != NULL)
+		nfs4_return_incompatible_delegation(path->dentry->d_inode, flags & (FMODE_READ|FMODE_WRITE));
 	down_read(&clp->cl_sem);
 	status = -ENOMEM;
-	opendata = nfs4_opendata_alloc(dentry, sp, flags, sattr);
+	opendata = nfs4_opendata_alloc(path, sp, flags, sattr);
 	if (opendata == NULL)
 		goto err_release_rwsem;
 
+	if (path->dentry->d_inode != NULL)
+		opendata->state = nfs4_get_open_state(path->dentry->d_inode, sp);
+
 	status = _nfs4_proc_open(opendata);
 	if (status != 0)
-		goto err_opendata_free;
+		goto err_opendata_put;
+
+	if (opendata->o_arg.open_flags & O_EXCL)
+		nfs4_exclusive_attrset(opendata, sattr);
 
-	status = -ENOMEM;
 	state = nfs4_opendata_to_nfs4_state(opendata);
-	if (state == NULL)
-		goto err_opendata_free;
-	if (opendata->o_res.delegation_type != 0)
-		nfs_inode_set_delegation(state->inode, cred, &opendata->o_res);
-	nfs4_opendata_free(opendata);
+	status = PTR_ERR(state);
+	if (IS_ERR(state))
+		goto err_opendata_put;
+	nfs4_opendata_put(opendata);
 	nfs4_put_state_owner(sp);
 	up_read(&clp->cl_sem);
 	*res = state;
 	return 0;
-err_opendata_free:
-	nfs4_opendata_free(opendata);
+err_opendata_put:
+	nfs4_opendata_put(opendata);
 err_release_rwsem:
 	up_read(&clp->cl_sem);
 err_put_state_owner:
@@ -1006,14 +1105,14 @@ out_err:
 }
 
 
-static struct nfs4_state *nfs4_do_open(struct inode *dir, struct dentry *dentry, int flags, struct iattr *sattr, struct rpc_cred *cred)
+static struct nfs4_state *nfs4_do_open(struct inode *dir, struct path *path, int flags, struct iattr *sattr, struct rpc_cred *cred)
 {
 	struct nfs4_exception exception = { };
 	struct nfs4_state *res;
 	int status;
 
 	do {
-		status = _nfs4_do_open(dir, dentry, flags, sattr, cred, &res);
+		status = _nfs4_do_open(dir, path, flags, sattr, cred, &res);
 		if (status == 0)
 			break;
 		/* NOTE: BAD_SEQID means the server and client disagree about the
@@ -1028,7 +1127,9 @@ static struct nfs4_state *nfs4_do_open(struct inode *dir, struct dentry *dentry,
 		 * the user though...
 		 */
 		if (status == -NFS4ERR_BAD_SEQID) {
-			printk(KERN_WARNING "NFS: v4 server returned a bad sequence-id error!\n");
+			printk(KERN_WARNING "NFS: v4 server %s "
+					" returned a bad sequence-id error!\n",
+					NFS_SERVER(dir)->nfs_client->cl_hostname);
 			exception.retry = 1;
 			continue;
 		}
@@ -1042,6 +1143,11 @@ static struct nfs4_state *nfs4_do_open(struct inode *dir, struct dentry *dentry,
 			exception.retry = 1;
 			continue;
 		}
+		if (status == -EAGAIN) {
+			/* We must have found a delegation */
+			exception.retry = 1;
+			continue;
+		}
 		res = ERR_PTR(nfs4_handle_exception(NFS_SERVER(dir),
 					status, &exception));
 	} while (exception.retry);
@@ -1101,6 +1207,7 @@ static int nfs4_do_setattr(struct inode *inode, struct nfs_fattr *fattr,
 }
 
 struct nfs4_closedata {
+	struct path path;
 	struct inode *inode;
 	struct nfs4_state *state;
 	struct nfs_closeargs arg;
@@ -1117,6 +1224,8 @@ static void nfs4_free_closedata(void *data)
 	nfs4_put_open_state(calldata->state);
 	nfs_free_seqid(calldata->arg.seqid);
 	nfs4_put_state_owner(sp);
+	dput(calldata->path.dentry);
+	mntput(calldata->path.mnt);
 	kfree(calldata);
 }
 
@@ -1134,8 +1243,7 @@ static void nfs4_close_done(struct rpc_task *task, void *data)
 	nfs_increment_open_seqid(task->tk_status, calldata->arg.seqid);
 	switch (task->tk_status) {
 		case 0:
-			memcpy(&state->stateid, &calldata->res.stateid,
-					sizeof(state->stateid));
+			nfs_set_open_stateid(state, &calldata->res.stateid, calldata->arg.open_flags);
 			renew_lease(server, calldata->timestamp);
 			break;
 		case -NFS4ERR_STALE_STATEID:
@@ -1160,26 +1268,30 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
 		.rpc_resp = &calldata->res,
 		.rpc_cred = state->owner->so_cred,
 	};
-	int mode = 0, old_mode;
+	int clear_rd, clear_wr, clear_rdwr;
+	int mode;
 
 	if (nfs_wait_on_sequence(calldata->arg.seqid, task) != 0)
 		return;
-	/* Recalculate the new open mode in case someone reopened the file
-	 * while we were waiting in line to be scheduled.
-	 */
+
+	mode = FMODE_READ|FMODE_WRITE;
+	clear_rd = clear_wr = clear_rdwr = 0;
 	spin_lock(&state->owner->so_lock);
-	spin_lock(&calldata->inode->i_lock);
-	mode = old_mode = state->state;
+	/* Calculate the change in open mode */
 	if (state->n_rdwr == 0) {
-		if (state->n_rdonly == 0)
+		if (state->n_rdonly == 0) {
 			mode &= ~FMODE_READ;
-		if (state->n_wronly == 0)
+			clear_rd |= test_and_clear_bit(NFS_O_RDONLY_STATE, &state->flags);
+			clear_rdwr |= test_and_clear_bit(NFS_O_RDWR_STATE, &state->flags);
+		}
+		if (state->n_wronly == 0) {
 			mode &= ~FMODE_WRITE;
+			clear_wr |= test_and_clear_bit(NFS_O_WRONLY_STATE, &state->flags);
+			clear_rdwr |= test_and_clear_bit(NFS_O_RDWR_STATE, &state->flags);
+		}
 	}
-	nfs4_state_set_mode_locked(state, mode);
-	spin_unlock(&calldata->inode->i_lock);
 	spin_unlock(&state->owner->so_lock);
-	if (mode == old_mode || test_bit(NFS_DELEGATED_STATE, &state->flags)) {
+	if (!clear_rd && !clear_wr && !clear_rdwr) {
 		/* Note: exit _without_ calling nfs4_close_done */
 		task->tk_action = NULL;
 		return;
@@ -1209,19 +1321,21 @@ static const struct rpc_call_ops nfs4_close_ops = {
  *
  * NOTE: Caller must be holding the sp->so_owner semaphore!
  */
-int nfs4_do_close(struct inode *inode, struct nfs4_state *state) 
+int nfs4_do_close(struct path *path, struct nfs4_state *state)
 {
-	struct nfs_server *server = NFS_SERVER(inode);
+	struct nfs_server *server = NFS_SERVER(state->inode);
 	struct nfs4_closedata *calldata;
+	struct nfs4_state_owner *sp = state->owner;
+	struct rpc_task *task;
 	int status = -ENOMEM;
 
 	calldata = kmalloc(sizeof(*calldata), GFP_KERNEL);
 	if (calldata == NULL)
 		goto out;
-	calldata->inode = inode;
+	calldata->inode = state->inode;
 	calldata->state = state;
-	calldata->arg.fh = NFS_FH(inode);
-	calldata->arg.stateid = &state->stateid;
+	calldata->arg.fh = NFS_FH(state->inode);
+	calldata->arg.stateid = &state->open_stateid;
 	/* Serialization for the sequence id */
 	calldata->arg.seqid = nfs_alloc_seqid(&state->owner->so_seqid);
 	if (calldata->arg.seqid == NULL)
@@ -1229,36 +1343,55 @@ int nfs4_do_close(struct inode *inode, struct nfs4_state *state)
 	calldata->arg.bitmask = server->attr_bitmask;
 	calldata->res.fattr = &calldata->fattr;
 	calldata->res.server = server;
+	calldata->path.mnt = mntget(path->mnt);
+	calldata->path.dentry = dget(path->dentry);
 
-	status = nfs4_call_async(server->client, &nfs4_close_ops, calldata);
-	if (status == 0)
-		goto out;
-
-	nfs_free_seqid(calldata->arg.seqid);
+	task = rpc_run_task(server->client, RPC_TASK_ASYNC, &nfs4_close_ops, calldata);
+	if (IS_ERR(task))
+		return PTR_ERR(task);
+	rpc_put_task(task);
+	return 0;
 out_free_calldata:
 	kfree(calldata);
 out:
+	nfs4_put_open_state(state);
+	nfs4_put_state_owner(sp);
 	return status;
 }
 
-static int nfs4_intent_set_file(struct nameidata *nd, struct dentry *dentry, struct nfs4_state *state)
+static int nfs4_intent_set_file(struct nameidata *nd, struct path *path, struct nfs4_state *state)
 {
 	struct file *filp;
+	int ret;
 
-	filp = lookup_instantiate_filp(nd, dentry, NULL);
+	/* If the open_intent is for execute, we have an extra check to make */
+	if (nd->intent.open.flags & FMODE_EXEC) {
+		ret = _nfs4_do_access(state->inode,
+				state->owner->so_cred,
+				nd->intent.open.flags);
+		if (ret < 0)
+			goto out_close;
+	}
+	filp = lookup_instantiate_filp(nd, path->dentry, NULL);
 	if (!IS_ERR(filp)) {
 		struct nfs_open_context *ctx;
 		ctx = (struct nfs_open_context *)filp->private_data;
 		ctx->state = state;
 		return 0;
 	}
-	nfs4_close_state(state, nd->intent.open.flags);
-	return PTR_ERR(filp);
+	ret = PTR_ERR(filp);
+out_close:
+	nfs4_close_state(path, state, nd->intent.open.flags);
+	return ret;
 }
 
 struct dentry *
 nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
 {
+	struct path path = {
+		.mnt = nd->mnt,
+		.dentry = dentry,
+	};
 	struct iattr attr;
 	struct rpc_cred *cred;
 	struct nfs4_state *state;
@@ -1277,7 +1410,7 @@ nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
 	cred = rpcauth_lookupcred(NFS_CLIENT(dir)->cl_auth, 0);
 	if (IS_ERR(cred))
 		return (struct dentry *)cred;
-	state = nfs4_do_open(dir, dentry, nd->intent.open.flags, &attr, cred);
+	state = nfs4_do_open(dir, &path, nd->intent.open.flags, &attr, cred);
 	put_rpccred(cred);
 	if (IS_ERR(state)) {
 		if (PTR_ERR(state) == -ENOENT)
@@ -1287,22 +1420,24 @@ nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
 	res = d_add_unique(dentry, igrab(state->inode));
 	if (res != NULL)
 		dentry = res;
-	nfs4_intent_set_file(nd, dentry, state);
+	nfs4_intent_set_file(nd, &path, state);
 	return res;
 }
 
 int
 nfs4_open_revalidate(struct inode *dir, struct dentry *dentry, int openflags, struct nameidata *nd)
 {
+	struct path path = {
+		.mnt = nd->mnt,
+		.dentry = dentry,
+	};
 	struct rpc_cred *cred;
 	struct nfs4_state *state;
 
 	cred = rpcauth_lookupcred(NFS_CLIENT(dir)->cl_auth, 0);
 	if (IS_ERR(cred))
 		return PTR_ERR(cred);
-	state = nfs4_open_delegated(dentry->d_inode, openflags, cred);
-	if (IS_ERR(state))
-		state = nfs4_do_open(dir, dentry, openflags, NULL, cred);
+	state = nfs4_do_open(dir, &path, openflags, NULL, cred);
 	put_rpccred(cred);
 	if (IS_ERR(state)) {
 		switch (PTR_ERR(state)) {
@@ -1318,10 +1453,10 @@ nfs4_open_revalidate(struct inode *dir, struct dentry *dentry, int openflags, st
 		}
 	}
 	if (state->inode == dentry->d_inode) {
-		nfs4_intent_set_file(nd, dentry, state);
+		nfs4_intent_set_file(nd, &path, state);
 		return 1;
 	}
-	nfs4_close_state(state, openflags);
+	nfs4_close_state(&path, state, openflags);
 out_drop:
 	d_drop(dentry);
 	return 0;
@@ -1559,8 +1694,6 @@ static int _nfs4_proc_lookupfh(struct nfs_server *server, struct nfs_fh *dirfh,
 	dprintk("NFS call  lookupfh %s\n", name->name);
 	status = rpc_call_sync(server->client, &msg, 0);
 	dprintk("NFS reply lookupfh: %d\n", status);
-	if (status == -NFS4ERR_MOVED)
-		status = -EREMOTE;
 	return status;
 }
 
@@ -1571,10 +1704,13 @@ static int nfs4_proc_lookupfh(struct nfs_server *server, struct nfs_fh *dirfh,
 	struct nfs4_exception exception = { };
 	int err;
 	do {
-		err = nfs4_handle_exception(server,
-				_nfs4_proc_lookupfh(server, dirfh, name,
-						    fhandle, fattr),
-				&exception);
+		err = _nfs4_proc_lookupfh(server, dirfh, name, fhandle, fattr);
+		/* FIXME: !!!! */
+		if (err == -NFS4ERR_MOVED) {
+			err = -EREMOTE;
+			break;
+		}
+		err = nfs4_handle_exception(server, err, &exception);
 	} while (exception.retry);
 	return err;
 }
@@ -1582,28 +1718,10 @@ static int nfs4_proc_lookupfh(struct nfs_server *server, struct nfs_fh *dirfh,
 static int _nfs4_proc_lookup(struct inode *dir, struct qstr *name,
 		struct nfs_fh *fhandle, struct nfs_fattr *fattr)
 {
-	int		       status;
-	struct nfs_server *server = NFS_SERVER(dir);
-	struct nfs4_lookup_arg args = {
-		.bitmask = server->attr_bitmask,
-		.dir_fh = NFS_FH(dir),
-		.name = name,
-	};
-	struct nfs4_lookup_res res = {
-		.server = server,
-		.fattr = fattr,
-		.fh = fhandle,
-	};
-	struct rpc_message msg = {
-		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LOOKUP],
-		.rpc_argp = &args,
-		.rpc_resp = &res,
-	};
-	
-	nfs_fattr_init(fattr);
+	int status;
 	
 	dprintk("NFS call  lookup %s\n", name->name);
-	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
+	status = _nfs4_proc_lookupfh(NFS_SERVER(dir), NFS_FH(dir), name, fhandle, fattr);
 	if (status == -NFS4ERR_MOVED)
 		status = nfs4_get_referral(dir, name, fattr, fhandle);
 	dprintk("NFS reply lookup: %d\n", status);
@@ -1752,6 +1870,10 @@ static int
 nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
                  int flags, struct nameidata *nd)
 {
+	struct path path = {
+		.mnt = nd->mnt,
+		.dentry = dentry,
+	};
 	struct nfs4_state *state;
 	struct rpc_cred *cred;
 	int status = 0;
@@ -1761,7 +1883,7 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
 		status = PTR_ERR(cred);
 		goto out;
 	}
-	state = nfs4_do_open(dir, dentry, flags, sattr, cred);
+	state = nfs4_do_open(dir, &path, flags, sattr, cred);
 	put_rpccred(cred);
 	if (IS_ERR(state)) {
 		status = PTR_ERR(state);
@@ -1773,11 +1895,12 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
 		status = nfs4_do_setattr(state->inode, &fattr, sattr, state);
 		if (status == 0)
 			nfs_setattr_update_inode(state->inode, sattr);
+		nfs_post_op_update_inode(state->inode, &fattr);
 	}
-	if (status == 0 && nd != NULL && (nd->flags & LOOKUP_OPEN))
-		status = nfs4_intent_set_file(nd, dentry, state);
+	if (status == 0 && (nd->flags & LOOKUP_OPEN) != 0)
+		status = nfs4_intent_set_file(nd, &path, state);
 	else
-		nfs4_close_state(state, flags);
+		nfs4_close_state(&path, state, flags);
 out:
 	return status;
 }
@@ -3008,7 +3131,7 @@ static int _nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock
 	if (status != 0)
 		goto out;
 	lsp = request->fl_u.nfs4_fl.owner;
-	arg.lock_owner.id = lsp->ls_id; 
+	arg.lock_owner.id = lsp->ls_id.id;
 	status = rpc_call_sync(server->client, &msg, 0);
 	switch (status) {
 		case 0:
@@ -3152,6 +3275,11 @@ static struct rpc_task *nfs4_do_unlck(struct file_lock *fl,
 {
 	struct nfs4_unlockdata *data;
 
+	/* Ensure this is an unlock - when canceling a lock, the
+	 * canceled lock is passed in, and it won't be an unlock.
+	 */
+	fl->fl_type = F_UNLCK;
+
 	data = nfs4_alloc_unlockdata(fl, ctx, lsp, seqid);
 	if (data == NULL) {
 		nfs_free_seqid(seqid);
@@ -3222,7 +3350,7 @@ static struct nfs4_lockdata *nfs4_alloc_lockdata(struct file_lock *fl,
 		goto out_free;
 	p->arg.lock_stateid = &lsp->ls_stateid;
 	p->arg.lock_owner.clientid = server->nfs_client->cl_clientid;
-	p->arg.lock_owner.id = lsp->ls_id;
+	p->arg.lock_owner.id = lsp->ls_id.id;
 	p->lsp = lsp;
 	atomic_inc(&lsp->ls_count);
 	p->ctx = get_nfs_open_context(ctx);
@@ -3285,7 +3413,7 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata)
 		memcpy(data->lsp->ls_stateid.data, data->res.stateid.data,
 					sizeof(data->lsp->ls_stateid.data));
 		data->lsp->ls_flags |= NFS_LOCK_INITIALIZED;
-		renew_lease(NFS_SERVER(data->ctx->dentry->d_inode), data->timestamp);
+		renew_lease(NFS_SERVER(data->ctx->path.dentry->d_inode), data->timestamp);
 	}
 	nfs_increment_lock_seqid(data->rpc_status, data->arg.lock_seqid);
 out:
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 8ed79d5c54f9..e9662ba81d86 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -38,12 +38,14 @@
  * subsequent patch.
  */
 
+#include <linux/kernel.h>
 #include <linux/slab.h>
 #include <linux/smp_lock.h>
 #include <linux/nfs_fs.h>
 #include <linux/nfs_idmap.h>
 #include <linux/kthread.h>
 #include <linux/module.h>
+#include <linux/random.h>
 #include <linux/workqueue.h>
 #include <linux/bitops.h>
 
@@ -69,33 +71,14 @@ static int nfs4_init_client(struct nfs_client *clp, struct rpc_cred *cred)
 	return status;
 }
 
-u32
-nfs4_alloc_lockowner_id(struct nfs_client *clp)
-{
-	return clp->cl_lockowner_id ++;
-}
-
-static struct nfs4_state_owner *
-nfs4_client_grab_unused(struct nfs_client *clp, struct rpc_cred *cred)
-{
-	struct nfs4_state_owner *sp = NULL;
-
-	if (!list_empty(&clp->cl_unused)) {
-		sp = list_entry(clp->cl_unused.next, struct nfs4_state_owner, so_list);
-		atomic_inc(&sp->so_count);
-		sp->so_cred = cred;
-		list_move(&sp->so_list, &clp->cl_state_owners);
-		clp->cl_nunused--;
-	}
-	return sp;
-}
-
 struct rpc_cred *nfs4_get_renew_cred(struct nfs_client *clp)
 {
 	struct nfs4_state_owner *sp;
+	struct rb_node *pos;
 	struct rpc_cred *cred = NULL;
 
-	list_for_each_entry(sp, &clp->cl_state_owners, so_list) {
+	for (pos = rb_first(&clp->cl_state_owners); pos != NULL; pos = rb_next(pos)) {
+		sp = rb_entry(pos, struct nfs4_state_owner, so_client_node);
 		if (list_empty(&sp->so_states))
 			continue;
 		cred = get_rpccred(sp->so_cred);
@@ -107,32 +90,146 @@ struct rpc_cred *nfs4_get_renew_cred(struct nfs_client *clp)
 static struct rpc_cred *nfs4_get_setclientid_cred(struct nfs_client *clp)
 {
 	struct nfs4_state_owner *sp;
+	struct rb_node *pos;
 
-	if (!list_empty(&clp->cl_state_owners)) {
-		sp = list_entry(clp->cl_state_owners.next,
-				struct nfs4_state_owner, so_list);
+	pos = rb_first(&clp->cl_state_owners);
+	if (pos != NULL) {
+		sp = rb_entry(pos, struct nfs4_state_owner, so_client_node);
 		return get_rpccred(sp->so_cred);
 	}
 	return NULL;
 }
 
+static void nfs_alloc_unique_id(struct rb_root *root, struct nfs_unique_id *new,
+		__u64 minval, int maxbits)
+{
+	struct rb_node **p, *parent;
+	struct nfs_unique_id *pos;
+	__u64 mask = ~0ULL;
+
+	if (maxbits < 64)
+		mask = (1ULL << maxbits) - 1ULL;
+
+	/* Ensure distribution is more or less flat */
+	get_random_bytes(&new->id, sizeof(new->id));
+	new->id &= mask;
+	if (new->id < minval)
+		new->id += minval;
+retry:
+	p = &root->rb_node;
+	parent = NULL;
+
+	while (*p != NULL) {
+		parent = *p;
+		pos = rb_entry(parent, struct nfs_unique_id, rb_node);
+
+		if (new->id < pos->id)
+			p = &(*p)->rb_left;
+		else if (new->id > pos->id)
+			p = &(*p)->rb_right;
+		else
+			goto id_exists;
+	}
+	rb_link_node(&new->rb_node, parent, p);
+	rb_insert_color(&new->rb_node, root);
+	return;
+id_exists:
+	for (;;) {
+		new->id++;
+		if (new->id < minval || (new->id & mask) != new->id) {
+			new->id = minval;
+			break;
+		}
+		parent = rb_next(parent);
+		if (parent == NULL)
+			break;
+		pos = rb_entry(parent, struct nfs_unique_id, rb_node);
+		if (new->id < pos->id)
+			break;
+	}
+	goto retry;
+}
+
+static void nfs_free_unique_id(struct rb_root *root, struct nfs_unique_id *id)
+{
+	rb_erase(&id->rb_node, root);
+}
+
 static struct nfs4_state_owner *
-nfs4_find_state_owner(struct nfs_client *clp, struct rpc_cred *cred)
+nfs4_find_state_owner(struct nfs_server *server, struct rpc_cred *cred)
 {
+	struct nfs_client *clp = server->nfs_client;
+	struct rb_node **p = &clp->cl_state_owners.rb_node,
+		       *parent = NULL;
 	struct nfs4_state_owner *sp, *res = NULL;
 
-	list_for_each_entry(sp, &clp->cl_state_owners, so_list) {
-		if (sp->so_cred != cred)
+	while (*p != NULL) {
+		parent = *p;
+		sp = rb_entry(parent, struct nfs4_state_owner, so_client_node);
+
+		if (server < sp->so_server) {
+			p = &parent->rb_left;
 			continue;
-		atomic_inc(&sp->so_count);
-		/* Move to the head of the list */
-		list_move(&sp->so_list, &clp->cl_state_owners);
-		res = sp;
-		break;
+		}
+		if (server > sp->so_server) {
+			p = &parent->rb_right;
+			continue;
+		}
+		if (cred < sp->so_cred)
+			p = &parent->rb_left;
+		else if (cred > sp->so_cred)
+			p = &parent->rb_right;
+		else {
+			atomic_inc(&sp->so_count);
+			res = sp;
+			break;
+		}
 	}
 	return res;
 }
 
+static struct nfs4_state_owner *
+nfs4_insert_state_owner(struct nfs_client *clp, struct nfs4_state_owner *new)
+{
+	struct rb_node **p = &clp->cl_state_owners.rb_node,
+		       *parent = NULL;
+	struct nfs4_state_owner *sp;
+
+	while (*p != NULL) {
+		parent = *p;
+		sp = rb_entry(parent, struct nfs4_state_owner, so_client_node);
+
+		if (new->so_server < sp->so_server) {
+			p = &parent->rb_left;
+			continue;
+		}
+		if (new->so_server > sp->so_server) {
+			p = &parent->rb_right;
+			continue;
+		}
+		if (new->so_cred < sp->so_cred)
+			p = &parent->rb_left;
+		else if (new->so_cred > sp->so_cred)
+			p = &parent->rb_right;
+		else {
+			atomic_inc(&sp->so_count);
+			return sp;
+		}
+	}
+	nfs_alloc_unique_id(&clp->cl_openowner_id, &new->so_owner_id, 1, 64);
+	rb_link_node(&new->so_client_node, parent, p);
+	rb_insert_color(&new->so_client_node, &clp->cl_state_owners);
+	return new;
+}
+
+static void
+nfs4_remove_state_owner(struct nfs_client *clp, struct nfs4_state_owner *sp)
+{
+	if (!RB_EMPTY_NODE(&sp->so_client_node))
+		rb_erase(&sp->so_client_node, &clp->cl_state_owners);
+	nfs_free_unique_id(&clp->cl_openowner_id, &sp->so_owner_id);
+}
+
 /*
  * nfs4_alloc_state_owner(): this is called on the OPEN or CREATE path to
  * create a new state_owner.
@@ -160,10 +257,14 @@ nfs4_alloc_state_owner(void)
 void
 nfs4_drop_state_owner(struct nfs4_state_owner *sp)
 {
-	struct nfs_client *clp = sp->so_client;
-	spin_lock(&clp->cl_lock);
-	list_del_init(&sp->so_list);
-	spin_unlock(&clp->cl_lock);
+	if (!RB_EMPTY_NODE(&sp->so_client_node)) {
+		struct nfs_client *clp = sp->so_client;
+
+		spin_lock(&clp->cl_lock);
+		rb_erase(&sp->so_client_node, &clp->cl_state_owners);
+		RB_CLEAR_NODE(&sp->so_client_node);
+		spin_unlock(&clp->cl_lock);
+	}
 }
 
 /*
@@ -175,26 +276,25 @@ struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server, struct
 	struct nfs_client *clp = server->nfs_client;
 	struct nfs4_state_owner *sp, *new;
 
-	get_rpccred(cred);
-	new = nfs4_alloc_state_owner();
 	spin_lock(&clp->cl_lock);
-	sp = nfs4_find_state_owner(clp, cred);
-	if (sp == NULL)
-		sp = nfs4_client_grab_unused(clp, cred);
-	if (sp == NULL && new != NULL) {
-		list_add(&new->so_list, &clp->cl_state_owners);
-		new->so_client = clp;
-		new->so_id = nfs4_alloc_lockowner_id(clp);
-		new->so_cred = cred;
-		sp = new;
-		new = NULL;
-	}
+	sp = nfs4_find_state_owner(server, cred);
 	spin_unlock(&clp->cl_lock);
-	kfree(new);
 	if (sp != NULL)
 		return sp;
-	put_rpccred(cred);
-	return NULL;
+	new = nfs4_alloc_state_owner();
+	if (new == NULL)
+		return NULL;
+	new->so_client = clp;
+	new->so_server = server;
+	new->so_cred = cred;
+	spin_lock(&clp->cl_lock);
+	sp = nfs4_insert_state_owner(clp, new);
+	spin_unlock(&clp->cl_lock);
+	if (sp == new)
+		get_rpccred(cred);
+	else
+		kfree(new);
+	return sp;
 }
 
 /*
@@ -208,18 +308,7 @@ void nfs4_put_state_owner(struct nfs4_state_owner *sp)
 
 	if (!atomic_dec_and_lock(&sp->so_count, &clp->cl_lock))
 		return;
-	if (clp->cl_nunused >= OPENOWNER_POOL_SIZE)
-		goto out_free;
-	if (list_empty(&sp->so_list))
-		goto out_free;
-	list_move(&sp->so_list, &clp->cl_unused);
-	clp->cl_nunused++;
-	spin_unlock(&clp->cl_lock);
-	put_rpccred(cred);
-	cred = NULL;
-	return;
-out_free:
-	list_del(&sp->so_list);
+	nfs4_remove_state_owner(clp, sp);
 	spin_unlock(&clp->cl_lock);
 	put_rpccred(cred);
 	kfree(sp);
@@ -236,6 +325,7 @@ nfs4_alloc_open_state(void)
 	atomic_set(&state->count, 1);
 	INIT_LIST_HEAD(&state->lock_states);
 	spin_lock_init(&state->state_lock);
+	seqlock_init(&state->seqlock);
 	return state;
 }
 
@@ -263,13 +353,10 @@ __nfs4_find_state_byowner(struct inode *inode, struct nfs4_state_owner *owner)
 	struct nfs4_state *state;
 
 	list_for_each_entry(state, &nfsi->open_states, inode_states) {
-		/* Is this in the process of being freed? */
-		if (state->state == 0)
+		if (state->owner != owner)
 			continue;
-		if (state->owner == owner) {
-			atomic_inc(&state->count);
+		if (atomic_inc_not_zero(&state->count))
 			return state;
-		}
 	}
 	return NULL;
 }
@@ -341,16 +428,15 @@ void nfs4_put_open_state(struct nfs4_state *state)
 /*
  * Close the current file.
  */
-void nfs4_close_state(struct nfs4_state *state, mode_t mode)
+void nfs4_close_state(struct path *path, struct nfs4_state *state, mode_t mode)
 {
-	struct inode *inode = state->inode;
 	struct nfs4_state_owner *owner = state->owner;
-	int oldstate, newstate = 0;
+	int call_close = 0;
+	int newstate;
 
 	atomic_inc(&owner->so_count);
 	/* Protect against nfs4_find_state() */
 	spin_lock(&owner->so_lock);
-	spin_lock(&inode->i_lock);
 	switch (mode & (FMODE_READ | FMODE_WRITE)) {
 		case FMODE_READ:
 			state->n_rdonly--;
@@ -361,24 +447,29 @@ void nfs4_close_state(struct nfs4_state *state, mode_t mode)
 		case FMODE_READ|FMODE_WRITE:
 			state->n_rdwr--;
 	}
-	oldstate = newstate = state->state;
+	newstate = FMODE_READ|FMODE_WRITE;
 	if (state->n_rdwr == 0) {
-		if (state->n_rdonly == 0)
+		if (state->n_rdonly == 0) {
 			newstate &= ~FMODE_READ;
-		if (state->n_wronly == 0)
+			call_close |= test_bit(NFS_O_RDONLY_STATE, &state->flags);
+			call_close |= test_bit(NFS_O_RDWR_STATE, &state->flags);
+		}
+		if (state->n_wronly == 0) {
 			newstate &= ~FMODE_WRITE;
+			call_close |= test_bit(NFS_O_WRONLY_STATE, &state->flags);
+			call_close |= test_bit(NFS_O_RDWR_STATE, &state->flags);
+		}
+		if (newstate == 0)
+			clear_bit(NFS_DELEGATED_STATE, &state->flags);
 	}
-	if (test_bit(NFS_DELEGATED_STATE, &state->flags)) {
-		nfs4_state_set_mode_locked(state, newstate);
-		oldstate = newstate;
-	}
-	spin_unlock(&inode->i_lock);
+	nfs4_state_set_mode_locked(state, newstate);
 	spin_unlock(&owner->so_lock);
 
-	if (oldstate != newstate && nfs4_do_close(inode, state) == 0)
-		return;
-	nfs4_put_open_state(state);
-	nfs4_put_state_owner(owner);
+	if (!call_close) {
+		nfs4_put_open_state(state);
+		nfs4_put_state_owner(owner);
+	} else
+		nfs4_do_close(path, state);
 }
 
 /*
@@ -415,12 +506,22 @@ static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, f
 	atomic_set(&lsp->ls_count, 1);
 	lsp->ls_owner = fl_owner;
 	spin_lock(&clp->cl_lock);
-	lsp->ls_id = nfs4_alloc_lockowner_id(clp);
+	nfs_alloc_unique_id(&clp->cl_lockowner_id, &lsp->ls_id, 1, 64);
 	spin_unlock(&clp->cl_lock);
 	INIT_LIST_HEAD(&lsp->ls_locks);
 	return lsp;
 }
 
+static void nfs4_free_lock_state(struct nfs4_lock_state *lsp)
+{
+	struct nfs_client *clp = lsp->ls_state->owner->so_client;
+
+	spin_lock(&clp->cl_lock);
+	nfs_free_unique_id(&clp->cl_lockowner_id, &lsp->ls_id);
+	spin_unlock(&clp->cl_lock);
+	kfree(lsp);
+}
+
 /*
  * Return a compatible lock_state. If no initialized lock_state structure
  * exists, return an uninitialized one.
@@ -450,7 +551,8 @@ static struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_
 			return NULL;
 	}
 	spin_unlock(&state->state_lock);
-	kfree(new);
+	if (new != NULL)
+		nfs4_free_lock_state(new);
 	return lsp;
 }
 
@@ -471,7 +573,7 @@ void nfs4_put_lock_state(struct nfs4_lock_state *lsp)
 	if (list_empty(&state->lock_states))
 		clear_bit(LK_STATE_IN_USE, &state->flags);
 	spin_unlock(&state->state_lock);
-	kfree(lsp);
+	nfs4_free_lock_state(lsp);
 }
 
 static void nfs4_fl_copy_lock(struct file_lock *dst, struct file_lock *src)
@@ -513,8 +615,12 @@ int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl)
 void nfs4_copy_stateid(nfs4_stateid *dst, struct nfs4_state *state, fl_owner_t fl_owner)
 {
 	struct nfs4_lock_state *lsp;
+	int seq;
 
-	memcpy(dst, &state->stateid, sizeof(*dst));
+	do {
+		seq = read_seqbegin(&state->seqlock);
+		memcpy(dst, &state->stateid, sizeof(*dst));
+	} while (read_seqretry(&state->seqlock, seq));
 	if (test_bit(LK_STATE_IN_USE, &state->flags) == 0)
 		return;
 
@@ -557,12 +663,18 @@ void nfs_free_seqid(struct nfs_seqid *seqid)
  * failed with a seqid incrementing error -
  * see comments nfs_fs.h:seqid_mutating_error()
  */
-static inline void nfs_increment_seqid(int status, struct nfs_seqid *seqid)
+static void nfs_increment_seqid(int status, struct nfs_seqid *seqid)
 {
 	switch (status) {
 		case 0:
 			break;
 		case -NFS4ERR_BAD_SEQID:
+			if (seqid->sequence->flags & NFS_SEQID_CONFIRMED)
+				return;
+			printk(KERN_WARNING "NFS: v4 server returned a bad"
+					"sequence-id error on an"
+					"unconfirmed sequence %p!\n",
+					seqid->sequence);
 		case -NFS4ERR_STALE_CLIENTID:
 		case -NFS4ERR_STALE_STATEID:
 		case -NFS4ERR_BAD_STATEID:
@@ -586,7 +698,7 @@ void nfs_increment_open_seqid(int status, struct nfs_seqid *seqid)
 				struct nfs4_state_owner, so_seqid);
 		nfs4_drop_state_owner(sp);
 	}
-	return nfs_increment_seqid(status, seqid);
+	nfs_increment_seqid(status, seqid);
 }
 
 /*
@@ -596,7 +708,7 @@ void nfs_increment_open_seqid(int status, struct nfs_seqid *seqid)
  */
 void nfs_increment_lock_seqid(int status, struct nfs_seqid *seqid)
 {
-	return nfs_increment_seqid(status, seqid);
+	nfs_increment_seqid(status, seqid);
 }
 
 int nfs_wait_on_sequence(struct nfs_seqid *seqid, struct rpc_task *task)
@@ -748,15 +860,21 @@ out_err:
 static void nfs4_state_mark_reclaim(struct nfs_client *clp)
 {
 	struct nfs4_state_owner *sp;
+	struct rb_node *pos;
 	struct nfs4_state *state;
 	struct nfs4_lock_state *lock;
 
 	/* Reset all sequence ids to zero */
-	list_for_each_entry(sp, &clp->cl_state_owners, so_list) {
+	for (pos = rb_first(&clp->cl_state_owners); pos != NULL; pos = rb_next(pos)) {
+		sp = rb_entry(pos, struct nfs4_state_owner, so_client_node);
 		sp->so_seqid.counter = 0;
 		sp->so_seqid.flags = 0;
 		spin_lock(&sp->so_lock);
 		list_for_each_entry(state, &sp->so_states, open_states) {
+			clear_bit(NFS_DELEGATED_STATE, &state->flags);
+			clear_bit(NFS_O_RDONLY_STATE, &state->flags);
+			clear_bit(NFS_O_WRONLY_STATE, &state->flags);
+			clear_bit(NFS_O_RDWR_STATE, &state->flags);
 			list_for_each_entry(lock, &state->lock_states, ls_locks) {
 				lock->ls_seqid.counter = 0;
 				lock->ls_seqid.flags = 0;
@@ -771,6 +889,7 @@ static int reclaimer(void *ptr)
 {
 	struct nfs_client *clp = ptr;
 	struct nfs4_state_owner *sp;
+	struct rb_node *pos;
 	struct nfs4_state_recovery_ops *ops;
 	struct rpc_cred *cred;
 	int status = 0;
@@ -816,7 +935,8 @@ restart_loop:
 	/* Mark all delegations for reclaim */
 	nfs_delegation_mark_reclaim(clp);
 	/* Note: list is protected by exclusive lock on cl->cl_sem */
-	list_for_each_entry(sp, &clp->cl_state_owners, so_list) {
+	for (pos = rb_first(&clp->cl_state_owners); pos != NULL; pos = rb_next(pos)) {
+		sp = rb_entry(pos, struct nfs4_state_owner, so_client_node);
 		status = nfs4_reclaim_open_state(ops, sp);
 		if (status < 0) {
 			if (status == -NFS4ERR_NO_GRACE) {
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 8003c91ccb9a..c08738441f73 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -68,9 +68,10 @@ static int nfs4_stat_to_errno(int);
 #endif
 
 /* lock,open owner id: 
- * we currently use size 1 (u32) out of (NFS4_OPAQUE_LIMIT  >> 2)
+ * we currently use size 2 (u64) out of (NFS4_OPAQUE_LIMIT  >> 2)
  */
-#define owner_id_maxsz          (1 + 1)
+#define open_owner_id_maxsz	(1 + 4)
+#define lock_owner_id_maxsz	(1 + 4)
 #define compound_encode_hdr_maxsz	(3 + (NFS4_MAXTAGLEN >> 2))
 #define compound_decode_hdr_maxsz	(3 + (NFS4_MAXTAGLEN >> 2))
 #define op_encode_hdr_maxsz	(1)
@@ -87,9 +88,11 @@ static int nfs4_stat_to_errno(int);
 #define encode_getattr_maxsz    (op_encode_hdr_maxsz + nfs4_fattr_bitmap_maxsz)
 #define nfs4_name_maxsz		(1 + ((3 + NFS4_MAXNAMLEN) >> 2))
 #define nfs4_path_maxsz		(1 + ((3 + NFS4_MAXPATHLEN) >> 2))
+#define nfs4_owner_maxsz	(1 + XDR_QUADLEN(IDMAP_NAMESZ))
+#define nfs4_group_maxsz	(1 + XDR_QUADLEN(IDMAP_NAMESZ))
 /* This is based on getfattr, which uses the most attributes: */
 #define nfs4_fattr_value_maxsz	(1 + (1 + 2 + 2 + 4 + 2 + 1 + 1 + 2 + 2 + \
-				3 + 3 + 3 + 2 * nfs4_name_maxsz))
+				3 + 3 + 3 + nfs4_owner_maxsz + nfs4_group_maxsz))
 #define nfs4_fattr_maxsz	(nfs4_fattr_bitmap_maxsz + \
 				nfs4_fattr_value_maxsz)
 #define decode_getattr_maxsz    (op_decode_hdr_maxsz + nfs4_fattr_maxsz)
@@ -116,8 +119,27 @@ static int nfs4_stat_to_errno(int);
 				3 + (NFS4_VERIFIER_SIZE >> 2))
 #define decode_setclientid_confirm_maxsz \
 				(op_decode_hdr_maxsz)
-#define encode_lookup_maxsz	(op_encode_hdr_maxsz + \
-				1 + ((3 + NFS4_FHSIZE) >> 2))
+#define encode_lookup_maxsz	(op_encode_hdr_maxsz + nfs4_name_maxsz)
+#define decode_lookup_maxsz	(op_decode_hdr_maxsz)
+#define encode_share_access_maxsz \
+				(2)
+#define encode_createmode_maxsz	(1 + nfs4_fattr_maxsz)
+#define encode_opentype_maxsz	(1 + encode_createmode_maxsz)
+#define encode_claim_null_maxsz	(1 + nfs4_name_maxsz)
+#define encode_open_maxsz	(op_encode_hdr_maxsz + \
+				2 + encode_share_access_maxsz + 2 + \
+				open_owner_id_maxsz + \
+				encode_opentype_maxsz + \
+				encode_claim_null_maxsz)
+#define decode_ace_maxsz	(3 + nfs4_owner_maxsz)
+#define decode_delegation_maxsz	(1 + XDR_QUADLEN(NFS4_STATEID_SIZE) + 1 + \
+				decode_ace_maxsz)
+#define decode_change_info_maxsz	(5)
+#define decode_open_maxsz	(op_decode_hdr_maxsz + \
+				XDR_QUADLEN(NFS4_STATEID_SIZE) + \
+				decode_change_info_maxsz + 1 + \
+				nfs4_fattr_bitmap_maxsz + \
+				decode_delegation_maxsz)
 #define encode_remove_maxsz	(op_encode_hdr_maxsz + \
 				nfs4_name_maxsz)
 #define encode_rename_maxsz	(op_encode_hdr_maxsz + \
@@ -134,9 +156,15 @@ static int nfs4_stat_to_errno(int);
 #define encode_create_maxsz	(op_encode_hdr_maxsz + \
 				2 + nfs4_name_maxsz + \
 				nfs4_fattr_maxsz)
-#define decode_create_maxsz	(op_decode_hdr_maxsz + 8)
+#define decode_create_maxsz	(op_decode_hdr_maxsz + \
+				decode_change_info_maxsz + \
+				nfs4_fattr_bitmap_maxsz)
 #define encode_delegreturn_maxsz (op_encode_hdr_maxsz + 4)
 #define decode_delegreturn_maxsz (op_decode_hdr_maxsz)
+#define encode_fs_locations_maxsz \
+				(encode_getattr_maxsz)
+#define decode_fs_locations_maxsz \
+				(0)
 #define NFS4_enc_compound_sz	(1024)  /* XXX: large enough? */
 #define NFS4_dec_compound_sz	(1024)  /* XXX: large enough? */
 #define NFS4_enc_read_sz	(compound_encode_hdr_maxsz + \
@@ -174,16 +202,21 @@ static int nfs4_stat_to_errno(int);
 				op_decode_hdr_maxsz + 2 + \
 				decode_getattr_maxsz)
 #define NFS4_enc_open_sz        (compound_encode_hdr_maxsz + \
-                                encode_putfh_maxsz + \
-                                op_encode_hdr_maxsz + \
-                                13 + 3 + 2 + 64 + \
-                                encode_getattr_maxsz + \
-                                encode_getfh_maxsz)
+				encode_putfh_maxsz + \
+				encode_savefh_maxsz + \
+				encode_open_maxsz + \
+				encode_getfh_maxsz + \
+				encode_getattr_maxsz + \
+				encode_restorefh_maxsz + \
+				encode_getattr_maxsz)
 #define NFS4_dec_open_sz        (compound_decode_hdr_maxsz + \
-                                decode_putfh_maxsz + \
-                                op_decode_hdr_maxsz + 4 + 5 + 2 + 3 + \
-                                decode_getattr_maxsz + \
-                                decode_getfh_maxsz)
+				decode_putfh_maxsz + \
+				decode_savefh_maxsz + \
+				decode_open_maxsz + \
+				decode_getfh_maxsz + \
+				decode_getattr_maxsz + \
+				decode_restorefh_maxsz + \
+				decode_getattr_maxsz)
 #define NFS4_enc_open_confirm_sz      \
                                 (compound_encode_hdr_maxsz + \
                                 encode_putfh_maxsz + \
@@ -193,12 +226,12 @@ static int nfs4_stat_to_errno(int);
                                         op_decode_hdr_maxsz + 4)
 #define NFS4_enc_open_noattr_sz	(compound_encode_hdr_maxsz + \
 					encode_putfh_maxsz + \
-					op_encode_hdr_maxsz + \
-					11)
+					encode_open_maxsz + \
+					encode_getattr_maxsz)
 #define NFS4_dec_open_noattr_sz	(compound_decode_hdr_maxsz + \
 					decode_putfh_maxsz + \
-					op_decode_hdr_maxsz + \
-					4 + 5 + 2 + 3)
+					decode_open_maxsz + \
+					decode_getattr_maxsz)
 #define NFS4_enc_open_downgrade_sz \
 				(compound_encode_hdr_maxsz + \
                                 encode_putfh_maxsz + \
@@ -256,19 +289,19 @@ static int nfs4_stat_to_errno(int);
 				op_encode_hdr_maxsz + \
 				1 + 1 + 2 + 2 + \
 				1 + 4 + 1 + 2 + \
-				owner_id_maxsz)
+				lock_owner_id_maxsz)
 #define NFS4_dec_lock_sz        (compound_decode_hdr_maxsz + \
 				decode_putfh_maxsz + \
 				decode_getattr_maxsz + \
 				op_decode_hdr_maxsz + \
 				2 + 2 + 1 + 2 + \
-				owner_id_maxsz)
+				lock_owner_id_maxsz)
 #define NFS4_enc_lockt_sz       (compound_encode_hdr_maxsz + \
 				encode_putfh_maxsz + \
 				encode_getattr_maxsz + \
 				op_encode_hdr_maxsz + \
 				1 + 2 + 2 + 2 + \
-				owner_id_maxsz)
+				lock_owner_id_maxsz)
 #define NFS4_dec_lockt_sz       (NFS4_dec_lock_sz)
 #define NFS4_enc_locku_sz       (compound_encode_hdr_maxsz + \
 				encode_putfh_maxsz + \
@@ -298,7 +331,7 @@ static int nfs4_stat_to_errno(int);
 				encode_getfh_maxsz)
 #define NFS4_dec_lookup_sz	(compound_decode_hdr_maxsz + \
 				decode_putfh_maxsz + \
-				op_decode_hdr_maxsz + \
+				decode_lookup_maxsz + \
 				decode_getattr_maxsz + \
 				decode_getfh_maxsz)
 #define NFS4_enc_lookup_root_sz (compound_encode_hdr_maxsz + \
@@ -417,12 +450,13 @@ static int nfs4_stat_to_errno(int);
 #define NFS4_enc_fs_locations_sz \
 				(compound_encode_hdr_maxsz + \
 				 encode_putfh_maxsz + \
-				 encode_getattr_maxsz)
+				 encode_lookup_maxsz + \
+				 encode_fs_locations_maxsz)
 #define NFS4_dec_fs_locations_sz \
 				(compound_decode_hdr_maxsz + \
 				 decode_putfh_maxsz + \
-				 op_decode_hdr_maxsz + \
-				 nfs4_fattr_bitmap_maxsz)
+				 decode_lookup_maxsz + \
+				 decode_fs_locations_maxsz)
 
 static struct {
 	unsigned int	mode;
@@ -793,13 +827,14 @@ static int encode_lock(struct xdr_stream *xdr, const struct nfs_lock_args *args)
 	WRITE64(nfs4_lock_length(args->fl));
 	WRITE32(args->new_lock_owner);
 	if (args->new_lock_owner){
-		RESERVE_SPACE(4+NFS4_STATEID_SIZE+20);
+		RESERVE_SPACE(4+NFS4_STATEID_SIZE+32);
 		WRITE32(args->open_seqid->sequence->counter);
 		WRITEMEM(args->open_stateid->data, NFS4_STATEID_SIZE);
 		WRITE32(args->lock_seqid->sequence->counter);
 		WRITE64(args->lock_owner.clientid);
-		WRITE32(4);
-		WRITE32(args->lock_owner.id);
+		WRITE32(16);
+		WRITEMEM("lock id:", 8);
+		WRITE64(args->lock_owner.id);
 	}
 	else {
 		RESERVE_SPACE(NFS4_STATEID_SIZE+4);
@@ -814,14 +849,15 @@ static int encode_lockt(struct xdr_stream *xdr, const struct nfs_lockt_args *arg
 {
 	__be32 *p;
 
-	RESERVE_SPACE(40);
+	RESERVE_SPACE(52);
 	WRITE32(OP_LOCKT);
 	WRITE32(nfs4_lock_type(args->fl, 0));
 	WRITE64(args->fl->fl_start);
 	WRITE64(nfs4_lock_length(args->fl));
 	WRITE64(args->lock_owner.clientid);
-	WRITE32(4);
-	WRITE32(args->lock_owner.id);
+	WRITE32(16);
+	WRITEMEM("lock id:", 8);
+	WRITE64(args->lock_owner.id);
 
 	return 0;
 }
@@ -886,10 +922,11 @@ static inline void encode_openhdr(struct xdr_stream *xdr, const struct nfs_opena
 	WRITE32(OP_OPEN);
 	WRITE32(arg->seqid->sequence->counter);
 	encode_share_access(xdr, arg->open_flags);
-	RESERVE_SPACE(16);
+	RESERVE_SPACE(28);
 	WRITE64(arg->clientid);
-	WRITE32(4);
-	WRITE32(arg->id);
+	WRITE32(16);
+	WRITEMEM("open id:", 8);
+	WRITE64(arg->id);
 }
 
 static inline void encode_createmode(struct xdr_stream *xdr, const struct nfs_openargs *arg)
@@ -1071,7 +1108,7 @@ static int encode_read(struct xdr_stream *xdr, const struct nfs_readargs *args)
 
 static int encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg *readdir, struct rpc_rqst *req)
 {
-	struct rpc_auth *auth = req->rq_task->tk_auth;
+	struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth;
 	uint32_t attrs[2] = {
 		FATTR4_WORD0_RDATTR_ERROR|FATTR4_WORD0_FILEID,
 		FATTR4_WORD1_MOUNTED_ON_FILEID,
@@ -1117,7 +1154,7 @@ static int encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg
 
 static int encode_readlink(struct xdr_stream *xdr, const struct nfs4_readlink *readlink, struct rpc_rqst *req)
 {
-	struct rpc_auth *auth = req->rq_task->tk_auth;
+	struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth;
 	unsigned int replen;
 	__be32 *p;
 
@@ -1735,7 +1772,7 @@ out:
  */
 static int nfs4_xdr_enc_read(struct rpc_rqst *req, __be32 *p, struct nfs_readargs *args)
 {
-	struct rpc_auth	*auth = req->rq_task->tk_auth;
+	struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth;
 	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
 		.nops = 2,
@@ -1795,7 +1832,7 @@ nfs4_xdr_enc_getacl(struct rpc_rqst *req, __be32 *p,
 		struct nfs_getaclargs *args)
 {
 	struct xdr_stream xdr;
-	struct rpc_auth *auth = req->rq_task->tk_auth;
+	struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth;
 	struct compound_hdr hdr = {
 		.nops   = 2,
 	};
@@ -2030,7 +2067,7 @@ static int nfs4_xdr_enc_fs_locations(struct rpc_rqst *req, __be32 *p, struct nfs
 	struct compound_hdr hdr = {
 		.nops = 3,
 	};
-	struct rpc_auth *auth = req->rq_task->tk_auth;
+	struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth;
 	int replen;
 	int status;
 
@@ -3269,7 +3306,7 @@ static int decode_delegation(struct xdr_stream *xdr, struct nfs_openres *res)
 static int decode_open(struct xdr_stream *xdr, struct nfs_openres *res)
 {
         __be32 *p;
-        uint32_t bmlen;
+	uint32_t savewords, bmlen, i;
         int status;
 
         status = decode_op_hdr(xdr, OP_OPEN);
@@ -3287,7 +3324,12 @@ static int decode_open(struct xdr_stream *xdr, struct nfs_openres *res)
                 goto xdr_error;
 
         READ_BUF(bmlen << 2);
-        p += bmlen;
+	savewords = min_t(uint32_t, bmlen, NFS4_BITMAP_SIZE);
+	for (i = 0; i < savewords; ++i)
+		READ32(res->attrset[i]);
+	for (; i < NFS4_BITMAP_SIZE; i++)
+		res->attrset[i] = 0;
+
 	return decode_delegation(xdr, res);
 xdr_error:
 	dprintk("%s: Bitmap too large! Length = %u\n", __FUNCTION__, bmlen);
diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c
index 49d1008ce1d7..3490322d1145 100644
--- a/fs/nfs/nfsroot.c
+++ b/fs/nfs/nfsroot.c
@@ -428,7 +428,7 @@ static int __init root_nfs_getport(int program, int version, int proto)
 	printk(KERN_NOTICE "Looking up port of RPC %d/%d on %u.%u.%u.%u\n",
 		program, version, NIPQUAD(servaddr));
 	set_sockaddr(&sin, servaddr, 0);
-	return rpcb_getport_external(&sin, program, version, proto);
+	return rpcb_getport_sync(&sin, program, version, proto);
 }
 
 
@@ -496,7 +496,8 @@ static int __init root_nfs_get_handle(void)
 					NFS_MNT3_VERSION : NFS_MNT_VERSION;
 
 	set_sockaddr(&sin, servaddr, htons(mount_port));
-	status = nfsroot_mount(&sin, nfs_path, &fh, version, protocol);
+	status = nfs_mount((struct sockaddr *) &sin, sizeof(sin), NULL,
+			   nfs_path, version, protocol, &fh);
 	if (status < 0)
 		printk(KERN_ERR "Root-NFS: Server returned error %d "
 				"while mounting %s\n", status, nfs_path);
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index c5bb51a29e80..f56dae5216f4 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -85,9 +85,8 @@ nfs_create_request(struct nfs_open_context *ctx, struct inode *inode,
 	req->wb_offset  = offset;
 	req->wb_pgbase	= offset;
 	req->wb_bytes   = count;
-	atomic_set(&req->wb_count, 1);
 	req->wb_context = get_nfs_open_context(ctx);
-
+	kref_init(&req->wb_kref);
 	return req;
 }
 
@@ -109,30 +108,31 @@ void nfs_unlock_request(struct nfs_page *req)
 }
 
 /**
- * nfs_set_page_writeback_locked - Lock a request for writeback
+ * nfs_set_page_tag_locked - Tag a request as locked
  * @req:
  */
-int nfs_set_page_writeback_locked(struct nfs_page *req)
+static int nfs_set_page_tag_locked(struct nfs_page *req)
 {
-	struct nfs_inode *nfsi = NFS_I(req->wb_context->dentry->d_inode);
+	struct nfs_inode *nfsi = NFS_I(req->wb_context->path.dentry->d_inode);
 
 	if (!nfs_lock_request(req))
 		return 0;
-	radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_WRITEBACK);
+	radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED);
 	return 1;
 }
 
 /**
- * nfs_clear_page_writeback - Unlock request and wake up sleepers
+ * nfs_clear_page_tag_locked - Clear request tag and wake up sleepers
  */
-void nfs_clear_page_writeback(struct nfs_page *req)
+void nfs_clear_page_tag_locked(struct nfs_page *req)
 {
-	struct nfs_inode *nfsi = NFS_I(req->wb_context->dentry->d_inode);
+	struct inode *inode = req->wb_context->path.dentry->d_inode;
+	struct nfs_inode *nfsi = NFS_I(inode);
 
 	if (req->wb_page != NULL) {
-		spin_lock(&nfsi->req_lock);
-		radix_tree_tag_clear(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_WRITEBACK);
-		spin_unlock(&nfsi->req_lock);
+		spin_lock(&inode->i_lock);
+		radix_tree_tag_clear(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED);
+		spin_unlock(&inode->i_lock);
 	}
 	nfs_unlock_request(req);
 }
@@ -160,11 +160,9 @@ void nfs_clear_request(struct nfs_page *req)
  *
  * Note: Should never be called with the spinlock held!
  */
-void
-nfs_release_request(struct nfs_page *req)
+static void nfs_free_request(struct kref *kref)
 {
-	if (!atomic_dec_and_test(&req->wb_count))
-		return;
+	struct nfs_page *req = container_of(kref, struct nfs_page, wb_kref);
 
 	/* Release struct file or cached credential */
 	nfs_clear_request(req);
@@ -172,6 +170,11 @@ nfs_release_request(struct nfs_page *req)
 	nfs_page_free(req);
 }
 
+void nfs_release_request(struct nfs_page *req)
+{
+	kref_put(&req->wb_kref, nfs_free_request);
+}
+
 static int nfs_wait_bit_interruptible(void *word)
 {
 	int ret = 0;
@@ -193,7 +196,7 @@ static int nfs_wait_bit_interruptible(void *word)
 int
 nfs_wait_on_request(struct nfs_page *req)
 {
-        struct rpc_clnt	*clnt = NFS_CLIENT(req->wb_context->dentry->d_inode);
+	struct rpc_clnt *clnt = NFS_CLIENT(req->wb_context->path.dentry->d_inode);
 	sigset_t oldmask;
 	int ret = 0;
 
@@ -379,20 +382,20 @@ void nfs_pageio_cond_complete(struct nfs_pageio_descriptor *desc, pgoff_t index)
 /**
  * nfs_scan_list - Scan a list for matching requests
  * @nfsi: NFS inode
- * @head: One of the NFS inode request lists
  * @dst: Destination list
  * @idx_start: lower bound of page->index to scan
  * @npages: idx_start + npages sets the upper bound to scan.
+ * @tag: tag to scan for
  *
  * Moves elements from one of the inode request lists.
  * If the number of requests is set to 0, the entire address_space
  * starting at index idx_start, is scanned.
  * The requests are *not* checked to ensure that they form a contiguous set.
- * You must be holding the inode's req_lock when calling this function
+ * You must be holding the inode's i_lock when calling this function
  */
-int nfs_scan_list(struct nfs_inode *nfsi, struct list_head *head,
+int nfs_scan_list(struct nfs_inode *nfsi,
 		struct list_head *dst, pgoff_t idx_start,
-		unsigned int npages)
+		unsigned int npages, int tag)
 {
 	struct nfs_page *pgvec[NFS_SCAN_MAXENTRIES];
 	struct nfs_page *req;
@@ -407,9 +410,9 @@ int nfs_scan_list(struct nfs_inode *nfsi, struct list_head *head,
 		idx_end = idx_start + npages - 1;
 
 	for (;;) {
-		found = radix_tree_gang_lookup(&nfsi->nfs_page_tree,
+		found = radix_tree_gang_lookup_tag(&nfsi->nfs_page_tree,
 				(void **)&pgvec[0], idx_start,
-				NFS_SCAN_MAXENTRIES);
+				NFS_SCAN_MAXENTRIES, tag);
 		if (found <= 0)
 			break;
 		for (i = 0; i < found; i++) {
@@ -417,15 +420,18 @@ int nfs_scan_list(struct nfs_inode *nfsi, struct list_head *head,
 			if (req->wb_index > idx_end)
 				goto out;
 			idx_start = req->wb_index + 1;
-			if (req->wb_list_head != head)
-				continue;
-			if (nfs_set_page_writeback_locked(req)) {
+			if (nfs_set_page_tag_locked(req)) {
 				nfs_list_remove_request(req);
+				radix_tree_tag_clear(&nfsi->nfs_page_tree,
+						req->wb_index, tag);
 				nfs_list_add_request(req, dst);
 				res++;
+				if (res == INT_MAX)
+					goto out;
 			}
 		}
-
+		/* for latency reduction */
+		cond_resched_lock(&nfsi->vfs_inode.i_lock);
 	}
 out:
 	return res;
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 7bd7cb95c034..6ae2e58ed05a 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -145,8 +145,8 @@ static void nfs_readpage_release(struct nfs_page *req)
 	unlock_page(req->wb_page);
 
 	dprintk("NFS: read done (%s/%Ld %d@%Ld)\n",
-			req->wb_context->dentry->d_inode->i_sb->s_id,
-			(long long)NFS_FILEID(req->wb_context->dentry->d_inode),
+			req->wb_context->path.dentry->d_inode->i_sb->s_id,
+			(long long)NFS_FILEID(req->wb_context->path.dentry->d_inode),
 			req->wb_bytes,
 			(long long)req_offset(req));
 	nfs_clear_request(req);
@@ -164,7 +164,7 @@ static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
 	int flags;
 
 	data->req	  = req;
-	data->inode	  = inode = req->wb_context->dentry->d_inode;
+	data->inode	  = inode = req->wb_context->path.dentry->d_inode;
 	data->cred	  = req->wb_context->cred;
 
 	data->args.fh     = NFS_FH(inode);
@@ -483,17 +483,19 @@ int nfs_readpage(struct file *file, struct page *page)
 	 */
 	error = nfs_wb_page(inode, page);
 	if (error)
-		goto out_error;
+		goto out_unlock;
+	if (PageUptodate(page))
+		goto out_unlock;
 
 	error = -ESTALE;
 	if (NFS_STALE(inode))
-		goto out_error;
+		goto out_unlock;
 
 	if (file == NULL) {
 		error = -EBADF;
 		ctx = nfs_find_open_context(inode, NULL, FMODE_READ);
 		if (ctx == NULL)
-			goto out_error;
+			goto out_unlock;
 	} else
 		ctx = get_nfs_open_context((struct nfs_open_context *)
 				file->private_data);
@@ -502,8 +504,7 @@ int nfs_readpage(struct file *file, struct page *page)
 
 	put_nfs_open_context(ctx);
 	return error;
-
-out_error:
+out_unlock:
 	unlock_page(page);
 	return error;
 }
@@ -520,21 +521,32 @@ readpage_async_filler(void *data, struct page *page)
 	struct inode *inode = page->mapping->host;
 	struct nfs_page *new;
 	unsigned int len;
+	int error;
+
+	error = nfs_wb_page(inode, page);
+	if (error)
+		goto out_unlock;
+	if (PageUptodate(page))
+		goto out_unlock;
 
-	nfs_wb_page(inode, page);
 	len = nfs_page_length(page);
 	if (len == 0)
 		return nfs_return_empty_page(page);
+
 	new = nfs_create_request(desc->ctx, inode, page, 0, len);
-	if (IS_ERR(new)) {
-			SetPageError(page);
-			unlock_page(page);
-			return PTR_ERR(new);
-	}
+	if (IS_ERR(new))
+		goto out_error;
+
 	if (len < PAGE_CACHE_SIZE)
 		zero_user_page(page, len, PAGE_CACHE_SIZE - len, KM_USER0);
 	nfs_pageio_add_request(desc->pgio, new);
 	return 0;
+out_error:
+	error = PTR_ERR(new);
+	SetPageError(page);
+out_unlock:
+	unlock_page(page);
+	return error;
 }
 
 int nfs_readpages(struct file *filp, struct address_space *mapping,
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index ca20d3cc2609..a2b1af89ca1a 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -45,6 +45,7 @@
 #include <linux/inet.h>
 #include <linux/nfs_xdr.h>
 #include <linux/magic.h>
+#include <linux/parser.h>
 
 #include <asm/system.h>
 #include <asm/uaccess.h>
@@ -57,6 +58,167 @@
 
 #define NFSDBG_FACILITY		NFSDBG_VFS
 
+
+struct nfs_parsed_mount_data {
+	int			flags;
+	int			rsize, wsize;
+	int			timeo, retrans;
+	int			acregmin, acregmax,
+				acdirmin, acdirmax;
+	int			namlen;
+	unsigned int		bsize;
+	unsigned int		auth_flavor_len;
+	rpc_authflavor_t	auth_flavors[1];
+	char			*client_address;
+
+	struct {
+		struct sockaddr_in	address;
+		unsigned int		program;
+		unsigned int		version;
+		unsigned short		port;
+		int			protocol;
+	} mount_server;
+
+	struct {
+		struct sockaddr_in	address;
+		char			*hostname;
+		char			*export_path;
+		unsigned int		program;
+		int			protocol;
+	} nfs_server;
+};
+
+enum {
+	/* Mount options that take no arguments */
+	Opt_soft, Opt_hard,
+	Opt_intr, Opt_nointr,
+	Opt_posix, Opt_noposix,
+	Opt_cto, Opt_nocto,
+	Opt_ac, Opt_noac,
+	Opt_lock, Opt_nolock,
+	Opt_v2, Opt_v3,
+	Opt_udp, Opt_tcp,
+	Opt_acl, Opt_noacl,
+	Opt_rdirplus, Opt_nordirplus,
+	Opt_sharecache, Opt_nosharecache,
+
+	/* Mount options that take integer arguments */
+	Opt_port,
+	Opt_rsize, Opt_wsize, Opt_bsize,
+	Opt_timeo, Opt_retrans,
+	Opt_acregmin, Opt_acregmax,
+	Opt_acdirmin, Opt_acdirmax,
+	Opt_actimeo,
+	Opt_namelen,
+	Opt_mountport,
+	Opt_mountprog, Opt_mountvers,
+	Opt_nfsprog, Opt_nfsvers,
+
+	/* Mount options that take string arguments */
+	Opt_sec, Opt_proto, Opt_mountproto,
+	Opt_addr, Opt_mounthost, Opt_clientaddr,
+
+	/* Mount options that are ignored */
+	Opt_userspace, Opt_deprecated,
+
+	Opt_err
+};
+
+static match_table_t nfs_mount_option_tokens = {
+	{ Opt_userspace, "bg" },
+	{ Opt_userspace, "fg" },
+	{ Opt_soft, "soft" },
+	{ Opt_hard, "hard" },
+	{ Opt_intr, "intr" },
+	{ Opt_nointr, "nointr" },
+	{ Opt_posix, "posix" },
+	{ Opt_noposix, "noposix" },
+	{ Opt_cto, "cto" },
+	{ Opt_nocto, "nocto" },
+	{ Opt_ac, "ac" },
+	{ Opt_noac, "noac" },
+	{ Opt_lock, "lock" },
+	{ Opt_nolock, "nolock" },
+	{ Opt_v2, "v2" },
+	{ Opt_v3, "v3" },
+	{ Opt_udp, "udp" },
+	{ Opt_tcp, "tcp" },
+	{ Opt_acl, "acl" },
+	{ Opt_noacl, "noacl" },
+	{ Opt_rdirplus, "rdirplus" },
+	{ Opt_nordirplus, "nordirplus" },
+	{ Opt_sharecache, "sharecache" },
+	{ Opt_nosharecache, "nosharecache" },
+
+	{ Opt_port, "port=%u" },
+	{ Opt_rsize, "rsize=%u" },
+	{ Opt_wsize, "wsize=%u" },
+	{ Opt_bsize, "bsize=%u" },
+	{ Opt_timeo, "timeo=%u" },
+	{ Opt_retrans, "retrans=%u" },
+	{ Opt_acregmin, "acregmin=%u" },
+	{ Opt_acregmax, "acregmax=%u" },
+	{ Opt_acdirmin, "acdirmin=%u" },
+	{ Opt_acdirmax, "acdirmax=%u" },
+	{ Opt_actimeo, "actimeo=%u" },
+	{ Opt_userspace, "retry=%u" },
+	{ Opt_namelen, "namlen=%u" },
+	{ Opt_mountport, "mountport=%u" },
+	{ Opt_mountprog, "mountprog=%u" },
+	{ Opt_mountvers, "mountvers=%u" },
+	{ Opt_nfsprog, "nfsprog=%u" },
+	{ Opt_nfsvers, "nfsvers=%u" },
+	{ Opt_nfsvers, "vers=%u" },
+
+	{ Opt_sec, "sec=%s" },
+	{ Opt_proto, "proto=%s" },
+	{ Opt_mountproto, "mountproto=%s" },
+	{ Opt_addr, "addr=%s" },
+	{ Opt_clientaddr, "clientaddr=%s" },
+	{ Opt_mounthost, "mounthost=%s" },
+
+	{ Opt_err, NULL }
+};
+
+enum {
+	Opt_xprt_udp, Opt_xprt_tcp,
+
+	Opt_xprt_err
+};
+
+static match_table_t nfs_xprt_protocol_tokens = {
+	{ Opt_xprt_udp, "udp" },
+	{ Opt_xprt_tcp, "tcp" },
+
+	{ Opt_xprt_err, NULL }
+};
+
+enum {
+	Opt_sec_none, Opt_sec_sys,
+	Opt_sec_krb5, Opt_sec_krb5i, Opt_sec_krb5p,
+	Opt_sec_lkey, Opt_sec_lkeyi, Opt_sec_lkeyp,
+	Opt_sec_spkm, Opt_sec_spkmi, Opt_sec_spkmp,
+
+	Opt_sec_err
+};
+
+static match_table_t nfs_secflavor_tokens = {
+	{ Opt_sec_none, "none" },
+	{ Opt_sec_none, "null" },
+	{ Opt_sec_sys, "sys" },
+
+	{ Opt_sec_krb5, "krb5" },
+	{ Opt_sec_krb5i, "krb5i" },
+	{ Opt_sec_krb5p, "krb5p" },
+
+	{ Opt_sec_lkey, "lkey" },
+	{ Opt_sec_lkeyi, "lkeyi" },
+	{ Opt_sec_lkeyp, "lkeyp" },
+
+	{ Opt_sec_err, NULL }
+};
+
+
 static void nfs_umount_begin(struct vfsmount *, int);
 static int  nfs_statfs(struct dentry *, struct kstatfs *);
 static int  nfs_show_options(struct seq_file *, struct vfsmount *);
@@ -263,11 +425,11 @@ static const char *nfs_pseudoflavour_to_name(rpc_authflavor_t flavour)
 		{ RPC_AUTH_GSS_SPKM, "spkm" },
 		{ RPC_AUTH_GSS_SPKMI, "spkmi" },
 		{ RPC_AUTH_GSS_SPKMP, "spkmp" },
-		{ -1, "unknown" }
+		{ UINT_MAX, "unknown" }
 	};
 	int i;
 
-	for (i=0; sec_flavours[i].flavour != -1; i++) {
+	for (i = 0; sec_flavours[i].flavour != UINT_MAX; i++) {
 		if (sec_flavours[i].flavour == flavour)
 			break;
 	}
@@ -291,6 +453,7 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss,
 		{ NFS_MOUNT_NONLM, ",nolock", "" },
 		{ NFS_MOUNT_NOACL, ",noacl", "" },
 		{ NFS_MOUNT_NORDIRPLUS, ",nordirplus", "" },
+		{ NFS_MOUNT_UNSHARED, ",nosharecache", ""},
 		{ 0, NULL, NULL }
 	};
 	const struct proc_nfs_info *nfs_infop;
@@ -430,87 +593,641 @@ static int nfs_show_stats(struct seq_file *m, struct vfsmount *mnt)
  */
 static void nfs_umount_begin(struct vfsmount *vfsmnt, int flags)
 {
+	struct nfs_server *server = NFS_SB(vfsmnt->mnt_sb);
+	struct rpc_clnt *rpc;
+
 	shrink_submounts(vfsmnt, &nfs_automount_list);
+
+	if (!(flags & MNT_FORCE))
+		return;
+	/* -EIO all pending I/O */
+	rpc = server->client_acl;
+	if (!IS_ERR(rpc))
+		rpc_killall_tasks(rpc);
+	rpc = server->client;
+	if (!IS_ERR(rpc))
+		rpc_killall_tasks(rpc);
 }
 
 /*
- * Validate the NFS2/NFS3 mount data
- * - fills in the mount root filehandle
+ * Sanity-check a server address provided by the mount command
  */
-static int nfs_validate_mount_data(struct nfs_mount_data *data,
-				   struct nfs_fh *mntfh)
+static int nfs_verify_server_address(struct sockaddr *addr)
 {
-	if (data == NULL) {
-		dprintk("%s: missing data argument\n", __FUNCTION__);
-		return -EINVAL;
+	switch (addr->sa_family) {
+	case AF_INET: {
+		struct sockaddr_in *sa = (struct sockaddr_in *) addr;
+		if (sa->sin_addr.s_addr != INADDR_ANY)
+			return 1;
+		break;
+	}
 	}
 
-	if (data->version <= 0 || data->version > NFS_MOUNT_VERSION) {
-		dprintk("%s: bad mount version\n", __FUNCTION__);
-		return -EINVAL;
+	return 0;
+}
+
+/*
+ * Error-check and convert a string of mount options from user space into
+ * a data structure
+ */
+static int nfs_parse_mount_options(char *raw,
+				   struct nfs_parsed_mount_data *mnt)
+{
+	char *p, *string;
+
+	if (!raw) {
+		dfprintk(MOUNT, "NFS: mount options string was NULL.\n");
+		return 1;
 	}
+	dfprintk(MOUNT, "NFS: nfs mount opts='%s'\n", raw);
 
-	switch (data->version) {
-		case 1:
-			data->namlen = 0;
-		case 2:
-			data->bsize  = 0;
-		case 3:
-			if (data->flags & NFS_MOUNT_VER3) {
-				dprintk("%s: mount structure version %d does not support NFSv3\n",
-						__FUNCTION__,
-						data->version);
-				return -EINVAL;
+	while ((p = strsep(&raw, ",")) != NULL) {
+		substring_t args[MAX_OPT_ARGS];
+		int option, token;
+
+		if (!*p)
+			continue;
+
+		dfprintk(MOUNT, "NFS:   parsing nfs mount option '%s'\n", p);
+
+		token = match_token(p, nfs_mount_option_tokens, args);
+		switch (token) {
+		case Opt_soft:
+			mnt->flags |= NFS_MOUNT_SOFT;
+			break;
+		case Opt_hard:
+			mnt->flags &= ~NFS_MOUNT_SOFT;
+			break;
+		case Opt_intr:
+			mnt->flags |= NFS_MOUNT_INTR;
+			break;
+		case Opt_nointr:
+			mnt->flags &= ~NFS_MOUNT_INTR;
+			break;
+		case Opt_posix:
+			mnt->flags |= NFS_MOUNT_POSIX;
+			break;
+		case Opt_noposix:
+			mnt->flags &= ~NFS_MOUNT_POSIX;
+			break;
+		case Opt_cto:
+			mnt->flags &= ~NFS_MOUNT_NOCTO;
+			break;
+		case Opt_nocto:
+			mnt->flags |= NFS_MOUNT_NOCTO;
+			break;
+		case Opt_ac:
+			mnt->flags &= ~NFS_MOUNT_NOAC;
+			break;
+		case Opt_noac:
+			mnt->flags |= NFS_MOUNT_NOAC;
+			break;
+		case Opt_lock:
+			mnt->flags &= ~NFS_MOUNT_NONLM;
+			break;
+		case Opt_nolock:
+			mnt->flags |= NFS_MOUNT_NONLM;
+			break;
+		case Opt_v2:
+			mnt->flags &= ~NFS_MOUNT_VER3;
+			break;
+		case Opt_v3:
+			mnt->flags |= NFS_MOUNT_VER3;
+			break;
+		case Opt_udp:
+			mnt->flags &= ~NFS_MOUNT_TCP;
+			mnt->nfs_server.protocol = IPPROTO_UDP;
+			mnt->timeo = 7;
+			mnt->retrans = 5;
+			break;
+		case Opt_tcp:
+			mnt->flags |= NFS_MOUNT_TCP;
+			mnt->nfs_server.protocol = IPPROTO_TCP;
+			mnt->timeo = 600;
+			mnt->retrans = 2;
+			break;
+		case Opt_acl:
+			mnt->flags &= ~NFS_MOUNT_NOACL;
+			break;
+		case Opt_noacl:
+			mnt->flags |= NFS_MOUNT_NOACL;
+			break;
+		case Opt_rdirplus:
+			mnt->flags &= ~NFS_MOUNT_NORDIRPLUS;
+			break;
+		case Opt_nordirplus:
+			mnt->flags |= NFS_MOUNT_NORDIRPLUS;
+			break;
+		case Opt_sharecache:
+			mnt->flags &= ~NFS_MOUNT_UNSHARED;
+			break;
+		case Opt_nosharecache:
+			mnt->flags |= NFS_MOUNT_UNSHARED;
+			break;
+
+		case Opt_port:
+			if (match_int(args, &option))
+				return 0;
+			if (option < 0 || option > 65535)
+				return 0;
+			mnt->nfs_server.address.sin_port = htonl(option);
+			break;
+		case Opt_rsize:
+			if (match_int(args, &mnt->rsize))
+				return 0;
+			break;
+		case Opt_wsize:
+			if (match_int(args, &mnt->wsize))
+				return 0;
+			break;
+		case Opt_bsize:
+			if (match_int(args, &option))
+				return 0;
+			if (option < 0)
+				return 0;
+			mnt->bsize = option;
+			break;
+		case Opt_timeo:
+			if (match_int(args, &mnt->timeo))
+				return 0;
+			break;
+		case Opt_retrans:
+			if (match_int(args, &mnt->retrans))
+				return 0;
+			break;
+		case Opt_acregmin:
+			if (match_int(args, &mnt->acregmin))
+				return 0;
+			break;
+		case Opt_acregmax:
+			if (match_int(args, &mnt->acregmax))
+				return 0;
+			break;
+		case Opt_acdirmin:
+			if (match_int(args, &mnt->acdirmin))
+				return 0;
+			break;
+		case Opt_acdirmax:
+			if (match_int(args, &mnt->acdirmax))
+				return 0;
+			break;
+		case Opt_actimeo:
+			if (match_int(args, &option))
+				return 0;
+			if (option < 0)
+				return 0;
+			mnt->acregmin =
+			mnt->acregmax =
+			mnt->acdirmin =
+			mnt->acdirmax = option;
+			break;
+		case Opt_namelen:
+			if (match_int(args, &mnt->namlen))
+				return 0;
+			break;
+		case Opt_mountport:
+			if (match_int(args, &option))
+				return 0;
+			if (option < 0 || option > 65535)
+				return 0;
+			mnt->mount_server.port = option;
+			break;
+		case Opt_mountprog:
+			if (match_int(args, &option))
+				return 0;
+			if (option < 0)
+				return 0;
+			mnt->mount_server.program = option;
+			break;
+		case Opt_mountvers:
+			if (match_int(args, &option))
+				return 0;
+			if (option < 0)
+				return 0;
+			mnt->mount_server.version = option;
+			break;
+		case Opt_nfsprog:
+			if (match_int(args, &option))
+				return 0;
+			if (option < 0)
+				return 0;
+			mnt->nfs_server.program = option;
+			break;
+		case Opt_nfsvers:
+			if (match_int(args, &option))
+				return 0;
+			switch (option) {
+			case 2:
+				mnt->flags &= ~NFS_MOUNT_VER3;
+				break;
+			case 3:
+				mnt->flags |= NFS_MOUNT_VER3;
+				break;
+			default:
+				goto out_unrec_vers;
 			}
-			data->root.size = NFS2_FHSIZE;
-			memcpy(data->root.data, data->old_root.data, NFS2_FHSIZE);
-		case 4:
-			if (data->flags & NFS_MOUNT_SECFLAVOUR) {
-				dprintk("%s: mount structure version %d does not support strong security\n",
-						__FUNCTION__,
-						data->version);
-				return -EINVAL;
+			break;
+
+		case Opt_sec:
+			string = match_strdup(args);
+			if (string == NULL)
+				goto out_nomem;
+			token = match_token(string, nfs_secflavor_tokens, args);
+			kfree(string);
+
+			/*
+			 * The flags setting is for v2/v3.  The flavor_len
+			 * setting is for v4.  v2/v3 also need to know the
+			 * difference between NULL and UNIX.
+			 */
+			switch (token) {
+			case Opt_sec_none:
+				mnt->flags &= ~NFS_MOUNT_SECFLAVOUR;
+				mnt->auth_flavor_len = 0;
+				mnt->auth_flavors[0] = RPC_AUTH_NULL;
+				break;
+			case Opt_sec_sys:
+				mnt->flags &= ~NFS_MOUNT_SECFLAVOUR;
+				mnt->auth_flavor_len = 0;
+				mnt->auth_flavors[0] = RPC_AUTH_UNIX;
+				break;
+			case Opt_sec_krb5:
+				mnt->flags |= NFS_MOUNT_SECFLAVOUR;
+				mnt->auth_flavor_len = 1;
+				mnt->auth_flavors[0] = RPC_AUTH_GSS_KRB5;
+				break;
+			case Opt_sec_krb5i:
+				mnt->flags |= NFS_MOUNT_SECFLAVOUR;
+				mnt->auth_flavor_len = 1;
+				mnt->auth_flavors[0] = RPC_AUTH_GSS_KRB5I;
+				break;
+			case Opt_sec_krb5p:
+				mnt->flags |= NFS_MOUNT_SECFLAVOUR;
+				mnt->auth_flavor_len = 1;
+				mnt->auth_flavors[0] = RPC_AUTH_GSS_KRB5P;
+				break;
+			case Opt_sec_lkey:
+				mnt->flags |= NFS_MOUNT_SECFLAVOUR;
+				mnt->auth_flavor_len = 1;
+				mnt->auth_flavors[0] = RPC_AUTH_GSS_LKEY;
+				break;
+			case Opt_sec_lkeyi:
+				mnt->flags |= NFS_MOUNT_SECFLAVOUR;
+				mnt->auth_flavor_len = 1;
+				mnt->auth_flavors[0] = RPC_AUTH_GSS_LKEYI;
+				break;
+			case Opt_sec_lkeyp:
+				mnt->flags |= NFS_MOUNT_SECFLAVOUR;
+				mnt->auth_flavor_len = 1;
+				mnt->auth_flavors[0] = RPC_AUTH_GSS_LKEYP;
+				break;
+			case Opt_sec_spkm:
+				mnt->flags |= NFS_MOUNT_SECFLAVOUR;
+				mnt->auth_flavor_len = 1;
+				mnt->auth_flavors[0] = RPC_AUTH_GSS_SPKM;
+				break;
+			case Opt_sec_spkmi:
+				mnt->flags |= NFS_MOUNT_SECFLAVOUR;
+				mnt->auth_flavor_len = 1;
+				mnt->auth_flavors[0] = RPC_AUTH_GSS_SPKMI;
+				break;
+			case Opt_sec_spkmp:
+				mnt->flags |= NFS_MOUNT_SECFLAVOUR;
+				mnt->auth_flavor_len = 1;
+				mnt->auth_flavors[0] = RPC_AUTH_GSS_SPKMP;
+				break;
+			default:
+				goto out_unrec_sec;
 			}
-		case 5:
-			memset(data->context, 0, sizeof(data->context));
-	}
+			break;
+		case Opt_proto:
+			string = match_strdup(args);
+			if (string == NULL)
+				goto out_nomem;
+			token = match_token(string,
+					    nfs_xprt_protocol_tokens, args);
+			kfree(string);
+
+			switch (token) {
+			case Opt_udp:
+				mnt->flags &= ~NFS_MOUNT_TCP;
+				mnt->nfs_server.protocol = IPPROTO_UDP;
+				mnt->timeo = 7;
+				mnt->retrans = 5;
+				break;
+			case Opt_tcp:
+				mnt->flags |= NFS_MOUNT_TCP;
+				mnt->nfs_server.protocol = IPPROTO_TCP;
+				mnt->timeo = 600;
+				mnt->retrans = 2;
+				break;
+			default:
+				goto out_unrec_xprt;
+			}
+			break;
+		case Opt_mountproto:
+			string = match_strdup(args);
+			if (string == NULL)
+				goto out_nomem;
+			token = match_token(string,
+					    nfs_xprt_protocol_tokens, args);
+			kfree(string);
+
+			switch (token) {
+			case Opt_udp:
+				mnt->mount_server.protocol = IPPROTO_UDP;
+				break;
+			case Opt_tcp:
+				mnt->mount_server.protocol = IPPROTO_TCP;
+				break;
+			default:
+				goto out_unrec_xprt;
+			}
+			break;
+		case Opt_addr:
+			string = match_strdup(args);
+			if (string == NULL)
+				goto out_nomem;
+			mnt->nfs_server.address.sin_family = AF_INET;
+			mnt->nfs_server.address.sin_addr.s_addr =
+							in_aton(string);
+			kfree(string);
+			break;
+		case Opt_clientaddr:
+			string = match_strdup(args);
+			if (string == NULL)
+				goto out_nomem;
+			mnt->client_address = string;
+			break;
+		case Opt_mounthost:
+			string = match_strdup(args);
+			if (string == NULL)
+				goto out_nomem;
+			mnt->mount_server.address.sin_family = AF_INET;
+			mnt->mount_server.address.sin_addr.s_addr =
+							in_aton(string);
+			kfree(string);
+			break;
 
-	/* Set the pseudoflavor */
-	if (!(data->flags & NFS_MOUNT_SECFLAVOUR))
-		data->pseudoflavor = RPC_AUTH_UNIX;
+		case Opt_userspace:
+		case Opt_deprecated:
+			break;
 
-#ifndef CONFIG_NFS_V3
-	/* If NFSv3 is not compiled in, return -EPROTONOSUPPORT */
-	if (data->flags & NFS_MOUNT_VER3) {
-		dprintk("%s: NFSv3 not compiled into kernel\n", __FUNCTION__);
-		return -EPROTONOSUPPORT;
+		default:
+			goto out_unknown;
+		}
 	}
-#endif /* CONFIG_NFS_V3 */
 
-	/* We now require that the mount process passes the remote address */
-	if (data->addr.sin_addr.s_addr == INADDR_ANY) {
-		dprintk("%s: mount program didn't pass remote address!\n",
-			__FUNCTION__);
-		return -EINVAL;
+	return 1;
+
+out_nomem:
+	printk(KERN_INFO "NFS: not enough memory to parse option\n");
+	return 0;
+
+out_unrec_vers:
+	printk(KERN_INFO "NFS: unrecognized NFS version number\n");
+	return 0;
+
+out_unrec_xprt:
+	printk(KERN_INFO "NFS: unrecognized transport protocol\n");
+	return 0;
+
+out_unrec_sec:
+	printk(KERN_INFO "NFS: unrecognized security flavor\n");
+	return 0;
+
+out_unknown:
+	printk(KERN_INFO "NFS: unknown mount option: %s\n", p);
+	return 0;
+}
+
+/*
+ * Use the remote server's MOUNT service to request the NFS file handle
+ * corresponding to the provided path.
+ */
+static int nfs_try_mount(struct nfs_parsed_mount_data *args,
+			 struct nfs_fh *root_fh)
+{
+	struct sockaddr_in sin;
+	int status;
+
+	if (args->mount_server.version == 0) {
+		if (args->flags & NFS_MOUNT_VER3)
+			args->mount_server.version = NFS_MNT3_VERSION;
+		else
+			args->mount_server.version = NFS_MNT_VERSION;
 	}
 
-	/* Prepare the root filehandle */
-	if (data->flags & NFS_MOUNT_VER3)
-		mntfh->size = data->root.size;
+	/*
+	 * Construct the mount server's address.
+	 */
+	if (args->mount_server.address.sin_addr.s_addr != INADDR_ANY)
+		sin = args->mount_server.address;
 	else
-		mntfh->size = NFS2_FHSIZE;
+		sin = args->nfs_server.address;
+	if (args->mount_server.port == 0) {
+		status = rpcb_getport_sync(&sin,
+					   args->mount_server.program,
+					   args->mount_server.version,
+					   args->mount_server.protocol);
+		if (status < 0)
+			goto out_err;
+		sin.sin_port = htons(status);
+	} else
+		sin.sin_port = htons(args->mount_server.port);
+
+	/*
+	 * Now ask the mount server to map our export path
+	 * to a file handle.
+	 */
+	status = nfs_mount((struct sockaddr *) &sin,
+			   sizeof(sin),
+			   args->nfs_server.hostname,
+			   args->nfs_server.export_path,
+			   args->mount_server.version,
+			   args->mount_server.protocol,
+			   root_fh);
+	if (status < 0)
+		goto out_err;
+
+	return status;
 
-	if (mntfh->size > sizeof(mntfh->data)) {
-		dprintk("%s: invalid root filehandle\n", __FUNCTION__);
-		return -EINVAL;
+out_err:
+	dfprintk(MOUNT, "NFS: unable to contact server on host "
+		 NIPQUAD_FMT "\n", NIPQUAD(sin.sin_addr.s_addr));
+	return status;
+}
+
+/*
+ * Validate the NFS2/NFS3 mount data
+ * - fills in the mount root filehandle
+ *
+ * For option strings, user space handles the following behaviors:
+ *
+ * + DNS: mapping server host name to IP address ("addr=" option)
+ *
+ * + failure mode: how to behave if a mount request can't be handled
+ *   immediately ("fg/bg" option)
+ *
+ * + retry: how often to retry a mount request ("retry=" option)
+ *
+ * + breaking back: trying proto=udp after proto=tcp, v2 after v3,
+ *   mountproto=tcp after mountproto=udp, and so on
+ *
+ * XXX: as far as I can tell, changing the NFS program number is not
+ *      supported in the NFS client.
+ */
+static int nfs_validate_mount_data(struct nfs_mount_data **options,
+				   struct nfs_fh *mntfh,
+				   const char *dev_name)
+{
+	struct nfs_mount_data *data = *options;
+
+	if (data == NULL)
+		goto out_no_data;
+
+	switch (data->version) {
+	case 1:
+		data->namlen = 0;
+	case 2:
+		data->bsize = 0;
+	case 3:
+		if (data->flags & NFS_MOUNT_VER3)
+			goto out_no_v3;
+		data->root.size = NFS2_FHSIZE;
+		memcpy(data->root.data, data->old_root.data, NFS2_FHSIZE);
+	case 4:
+		if (data->flags & NFS_MOUNT_SECFLAVOUR)
+			goto out_no_sec;
+	case 5:
+		memset(data->context, 0, sizeof(data->context));
+	case 6:
+		if (data->flags & NFS_MOUNT_VER3)
+			mntfh->size = data->root.size;
+		else
+			mntfh->size = NFS2_FHSIZE;
+
+		if (mntfh->size > sizeof(mntfh->data))
+			goto out_invalid_fh;
+
+		memcpy(mntfh->data, data->root.data, mntfh->size);
+		if (mntfh->size < sizeof(mntfh->data))
+			memset(mntfh->data + mntfh->size, 0,
+			       sizeof(mntfh->data) - mntfh->size);
+		break;
+	default: {
+		unsigned int len;
+		char *c;
+		int status;
+		struct nfs_parsed_mount_data args = {
+			.flags		= (NFS_MOUNT_VER3 | NFS_MOUNT_TCP),
+			.rsize		= NFS_MAX_FILE_IO_SIZE,
+			.wsize		= NFS_MAX_FILE_IO_SIZE,
+			.timeo		= 600,
+			.retrans	= 2,
+			.acregmin	= 3,
+			.acregmax	= 60,
+			.acdirmin	= 30,
+			.acdirmax	= 60,
+			.mount_server.protocol = IPPROTO_UDP,
+			.mount_server.program = NFS_MNT_PROGRAM,
+			.nfs_server.protocol = IPPROTO_TCP,
+			.nfs_server.program = NFS_PROGRAM,
+		};
+
+		if (nfs_parse_mount_options((char *) *options, &args) == 0)
+			return -EINVAL;
+
+		data = kzalloc(sizeof(*data), GFP_KERNEL);
+		if (data == NULL)
+			return -ENOMEM;
+
+		/*
+		 * NB: after this point, caller will free "data"
+		 * if we return an error
+		 */
+		*options = data;
+
+		c = strchr(dev_name, ':');
+		if (c == NULL)
+			return -EINVAL;
+		len = c - dev_name - 1;
+		if (len > sizeof(data->hostname))
+			return -EINVAL;
+		strncpy(data->hostname, dev_name, len);
+		args.nfs_server.hostname = data->hostname;
+
+		c++;
+		if (strlen(c) > NFS_MAXPATHLEN)
+			return -EINVAL;
+		args.nfs_server.export_path = c;
+
+		status = nfs_try_mount(&args, mntfh);
+		if (status)
+			return -EINVAL;
+
+		/*
+		 * Translate to nfs_mount_data, which nfs_fill_super
+		 * can deal with.
+		 */
+		data->version		= 6;
+		data->flags		= args.flags;
+		data->rsize		= args.rsize;
+		data->wsize		= args.wsize;
+		data->timeo		= args.timeo;
+		data->retrans		= args.retrans;
+		data->acregmin		= args.acregmin;
+		data->acregmax		= args.acregmax;
+		data->acdirmin		= args.acdirmin;
+		data->acdirmax		= args.acdirmax;
+		data->addr		= args.nfs_server.address;
+		data->namlen		= args.namlen;
+		data->bsize		= args.bsize;
+		data->pseudoflavor	= args.auth_flavors[0];
+
+		break;
+		}
 	}
 
-	memcpy(mntfh->data, data->root.data, mntfh->size);
-	if (mntfh->size < sizeof(mntfh->data))
-		memset(mntfh->data + mntfh->size, 0,
-		       sizeof(mntfh->data) - mntfh->size);
+	if (!(data->flags & NFS_MOUNT_SECFLAVOUR))
+		data->pseudoflavor = RPC_AUTH_UNIX;
+
+#ifndef CONFIG_NFS_V3
+	if (data->flags & NFS_MOUNT_VER3)
+		goto out_v3_not_compiled;
+#endif /* !CONFIG_NFS_V3 */
+
+	if (!nfs_verify_server_address((struct sockaddr *) &data->addr))
+		goto out_no_address;
 
 	return 0;
+
+out_no_data:
+	dfprintk(MOUNT, "NFS: mount program didn't pass any mount data\n");
+	return -EINVAL;
+
+out_no_v3:
+	dfprintk(MOUNT, "NFS: nfs_mount_data version %d does not support v3\n",
+		 data->version);
+	return -EINVAL;
+
+out_no_sec:
+	dfprintk(MOUNT, "NFS: nfs_mount_data version supports only AUTH_SYS\n");
+	return -EINVAL;
+
+#ifndef CONFIG_NFS_V3
+out_v3_not_compiled:
+	dfprintk(MOUNT, "NFS: NFSv3 is not compiled into kernel\n");
+	return -EPROTONOSUPPORT;
+#endif /* !CONFIG_NFS_V3 */
+
+out_no_address:
+	dfprintk(MOUNT, "NFS: mount program didn't pass remote address\n");
+	return -EINVAL;
+
+out_invalid_fh:
+	dfprintk(MOUNT, "NFS: invalid root filehandle\n");
+	return -EINVAL;
 }
 
 /*
@@ -600,13 +1317,51 @@ static int nfs_compare_super(struct super_block *sb, void *data)
 {
 	struct nfs_server *server = data, *old = NFS_SB(sb);
 
-	if (old->nfs_client != server->nfs_client)
+	if (memcmp(&old->nfs_client->cl_addr,
+				&server->nfs_client->cl_addr,
+				sizeof(old->nfs_client->cl_addr)) != 0)
+		return 0;
+	/* Note: NFS_MOUNT_UNSHARED == NFS4_MOUNT_UNSHARED */
+	if (old->flags & NFS_MOUNT_UNSHARED)
 		return 0;
 	if (memcmp(&old->fsid, &server->fsid, sizeof(old->fsid)) != 0)
 		return 0;
 	return 1;
 }
 
+#define NFS_MS_MASK (MS_RDONLY|MS_NOSUID|MS_NODEV|MS_NOEXEC|MS_SYNCHRONOUS)
+
+static int nfs_compare_mount_options(const struct super_block *s, const struct nfs_server *b, int flags)
+{
+	const struct nfs_server *a = s->s_fs_info;
+	const struct rpc_clnt *clnt_a = a->client;
+	const struct rpc_clnt *clnt_b = b->client;
+
+	if ((s->s_flags & NFS_MS_MASK) != (flags & NFS_MS_MASK))
+		goto Ebusy;
+	if (a->nfs_client != b->nfs_client)
+		goto Ebusy;
+	if (a->flags != b->flags)
+		goto Ebusy;
+	if (a->wsize != b->wsize)
+		goto Ebusy;
+	if (a->rsize != b->rsize)
+		goto Ebusy;
+	if (a->acregmin != b->acregmin)
+		goto Ebusy;
+	if (a->acregmax != b->acregmax)
+		goto Ebusy;
+	if (a->acdirmin != b->acdirmin)
+		goto Ebusy;
+	if (a->acdirmax != b->acdirmax)
+		goto Ebusy;
+	if (clnt_a->cl_auth->au_flavor != clnt_b->cl_auth->au_flavor)
+		goto Ebusy;
+	return 0;
+Ebusy:
+	return -EBUSY;
+}
+
 static int nfs_get_sb(struct file_system_type *fs_type,
 	int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt)
 {
@@ -615,30 +1370,37 @@ static int nfs_get_sb(struct file_system_type *fs_type,
 	struct nfs_fh mntfh;
 	struct nfs_mount_data *data = raw_data;
 	struct dentry *mntroot;
+	int (*compare_super)(struct super_block *, void *) = nfs_compare_super;
 	int error;
 
 	/* Validate the mount data */
-	error = nfs_validate_mount_data(data, &mntfh);
+	error = nfs_validate_mount_data(&data, &mntfh, dev_name);
 	if (error < 0)
-		return error;
+		goto out;
 
 	/* Get a volume representation */
 	server = nfs_create_server(data, &mntfh);
 	if (IS_ERR(server)) {
 		error = PTR_ERR(server);
-		goto out_err_noserver;
+		goto out;
 	}
 
+	if (server->flags & NFS_MOUNT_UNSHARED)
+		compare_super = NULL;
+
 	/* Get a superblock - note that we may end up sharing one that already exists */
-	s = sget(fs_type, nfs_compare_super, nfs_set_super, server);
+	s = sget(fs_type, compare_super, nfs_set_super, server);
 	if (IS_ERR(s)) {
 		error = PTR_ERR(s);
 		goto out_err_nosb;
 	}
 
 	if (s->s_fs_info != server) {
+		error = nfs_compare_mount_options(s, server, flags);
 		nfs_free_server(server);
 		server = NULL;
+		if (error < 0)
+			goto error_splat_super;
 	}
 
 	if (!s->s_root) {
@@ -656,17 +1418,21 @@ static int nfs_get_sb(struct file_system_type *fs_type,
 	s->s_flags |= MS_ACTIVE;
 	mnt->mnt_sb = s;
 	mnt->mnt_root = mntroot;
-	return 0;
+	error = 0;
+
+out:
+	if (data != raw_data)
+		kfree(data);
+	return error;
 
 out_err_nosb:
 	nfs_free_server(server);
-out_err_noserver:
-	return error;
+	goto out;
 
 error_splat_super:
 	up_write(&s->s_umount);
 	deactivate_super(s);
-	return error;
+	goto out;
 }
 
 /*
@@ -691,6 +1457,7 @@ static int nfs_xdev_get_sb(struct file_system_type *fs_type, int flags,
 	struct super_block *s;
 	struct nfs_server *server;
 	struct dentry *mntroot;
+	int (*compare_super)(struct super_block *, void *) = nfs_compare_super;
 	int error;
 
 	dprintk("--> nfs_xdev_get_sb()\n");
@@ -702,16 +1469,22 @@ static int nfs_xdev_get_sb(struct file_system_type *fs_type, int flags,
 		goto out_err_noserver;
 	}
 
+	if (server->flags & NFS_MOUNT_UNSHARED)
+		compare_super = NULL;
+
 	/* Get a superblock - note that we may end up sharing one that already exists */
-	s = sget(&nfs_fs_type, nfs_compare_super, nfs_set_super, server);
+	s = sget(&nfs_fs_type, compare_super, nfs_set_super, server);
 	if (IS_ERR(s)) {
 		error = PTR_ERR(s);
 		goto out_err_nosb;
 	}
 
 	if (s->s_fs_info != server) {
+		error = nfs_compare_mount_options(s, server, flags);
 		nfs_free_server(server);
 		server = NULL;
+		if (error < 0)
+			goto error_splat_super;
 	}
 
 	if (!s->s_root) {
@@ -772,25 +1545,164 @@ static void nfs4_fill_super(struct super_block *sb)
 	nfs_initialise_sb(sb);
 }
 
-static void *nfs_copy_user_string(char *dst, struct nfs_string *src, int maxlen)
+/*
+ * Validate NFSv4 mount options
+ */
+static int nfs4_validate_mount_data(struct nfs4_mount_data **options,
+				    const char *dev_name,
+				    struct sockaddr_in *addr,
+				    rpc_authflavor_t *authflavour,
+				    char **hostname,
+				    char **mntpath,
+				    char **ip_addr)
 {
-	void *p = NULL;
-
-	if (!src->len)
-		return ERR_PTR(-EINVAL);
-	if (src->len < maxlen)
-		maxlen = src->len;
-	if (dst == NULL) {
-		p = dst = kmalloc(maxlen + 1, GFP_KERNEL);
-		if (p == NULL)
-			return ERR_PTR(-ENOMEM);
-	}
-	if (copy_from_user(dst, src->data, maxlen)) {
-		kfree(p);
-		return ERR_PTR(-EFAULT);
+	struct nfs4_mount_data *data = *options;
+	char *c;
+
+	if (data == NULL)
+		goto out_no_data;
+
+	switch (data->version) {
+	case 1:
+		if (data->host_addrlen != sizeof(*addr))
+			goto out_no_address;
+		if (copy_from_user(addr, data->host_addr, sizeof(*addr)))
+			return -EFAULT;
+		if (addr->sin_port == 0)
+			addr->sin_port = htons(NFS_PORT);
+		if (!nfs_verify_server_address((struct sockaddr *) addr))
+			goto out_no_address;
+
+		switch (data->auth_flavourlen) {
+		case 0:
+			*authflavour = RPC_AUTH_UNIX;
+			break;
+		case 1:
+			if (copy_from_user(authflavour, data->auth_flavours,
+					   sizeof(*authflavour)))
+				return -EFAULT;
+			break;
+		default:
+			goto out_inval_auth;
+		}
+
+		c = strndup_user(data->hostname.data, NFS4_MAXNAMLEN);
+		if (IS_ERR(c))
+			return PTR_ERR(c);
+		*hostname = c;
+
+		c = strndup_user(data->mnt_path.data, NFS4_MAXPATHLEN);
+		if (IS_ERR(c))
+			return PTR_ERR(c);
+		*mntpath = c;
+		dfprintk(MOUNT, "NFS: MNTPATH: '%s'\n", *mntpath);
+
+		c = strndup_user(data->client_addr.data, 16);
+		if (IS_ERR(c))
+			return PTR_ERR(c);
+		*ip_addr = c;
+
+		break;
+	default: {
+		unsigned int len;
+		struct nfs_parsed_mount_data args = {
+			.rsize		= NFS_MAX_FILE_IO_SIZE,
+			.wsize		= NFS_MAX_FILE_IO_SIZE,
+			.timeo		= 600,
+			.retrans	= 2,
+			.acregmin	= 3,
+			.acregmax	= 60,
+			.acdirmin	= 30,
+			.acdirmax	= 60,
+			.nfs_server.protocol = IPPROTO_TCP,
+		};
+
+		if (nfs_parse_mount_options((char *) *options, &args) == 0)
+			return -EINVAL;
+
+		if (!nfs_verify_server_address((struct sockaddr *)
+						&args.nfs_server.address))
+			return -EINVAL;
+		*addr = args.nfs_server.address;
+
+		switch (args.auth_flavor_len) {
+		case 0:
+			*authflavour = RPC_AUTH_UNIX;
+			break;
+		case 1:
+			*authflavour = (rpc_authflavor_t) args.auth_flavors[0];
+			break;
+		default:
+			goto out_inval_auth;
+		}
+
+		/*
+		 * Translate to nfs4_mount_data, which nfs4_fill_super
+		 * can deal with.
+		 */
+		data = kzalloc(sizeof(*data), GFP_KERNEL);
+		if (data == NULL)
+			return -ENOMEM;
+		*options = data;
+
+		data->version	= 1;
+		data->flags	= args.flags & NFS4_MOUNT_FLAGMASK;
+		data->rsize	= args.rsize;
+		data->wsize	= args.wsize;
+		data->timeo	= args.timeo;
+		data->retrans	= args.retrans;
+		data->acregmin	= args.acregmin;
+		data->acregmax	= args.acregmax;
+		data->acdirmin	= args.acdirmin;
+		data->acdirmax	= args.acdirmax;
+		data->proto	= args.nfs_server.protocol;
+
+		/*
+		 * Split "dev_name" into "hostname:mntpath".
+		 */
+		c = strchr(dev_name, ':');
+		if (c == NULL)
+			return -EINVAL;
+		/* while calculating len, pretend ':' is '\0' */
+		len = c - dev_name;
+		if (len > NFS4_MAXNAMLEN)
+			return -EINVAL;
+		*hostname = kzalloc(len, GFP_KERNEL);
+		if (*hostname == NULL)
+			return -ENOMEM;
+		strncpy(*hostname, dev_name, len - 1);
+
+		c++;			/* step over the ':' */
+		len = strlen(c);
+		if (len > NFS4_MAXPATHLEN)
+			return -EINVAL;
+		*mntpath = kzalloc(len + 1, GFP_KERNEL);
+		if (*mntpath == NULL)
+			return -ENOMEM;
+		strncpy(*mntpath, c, len);
+
+		dprintk("MNTPATH: %s\n", *mntpath);
+
+		*ip_addr = args.client_address;
+
+		break;
+		}
 	}
-	dst[maxlen] = '\0';
-	return dst;
+
+	return 0;
+
+out_no_data:
+	dfprintk(MOUNT, "NFS4: mount program didn't pass any mount data\n");
+	return -EINVAL;
+
+out_inval_auth:
+	dfprintk(MOUNT, "NFS4: Invalid number of RPC auth flavours %d\n",
+		 data->auth_flavourlen);
+	return -EINVAL;
+
+out_no_address:
+	dfprintk(MOUNT, "NFS4: mount program didn't pass remote address\n");
+	return -EINVAL;
 }
 
 /*
@@ -806,81 +1718,29 @@ static int nfs4_get_sb(struct file_system_type *fs_type,
 	rpc_authflavor_t authflavour;
 	struct nfs_fh mntfh;
 	struct dentry *mntroot;
-	char *mntpath = NULL, *hostname = NULL, ip_addr[16];
-	void *p;
+	char *mntpath = NULL, *hostname = NULL, *ip_addr = NULL;
+	int (*compare_super)(struct super_block *, void *) = nfs_compare_super;
 	int error;
 
-	if (data == NULL) {
-		dprintk("%s: missing data argument\n", __FUNCTION__);
-		return -EINVAL;
-	}
-	if (data->version <= 0 || data->version > NFS4_MOUNT_VERSION) {
-		dprintk("%s: bad mount version\n", __FUNCTION__);
-		return -EINVAL;
-	}
-
-	/* We now require that the mount process passes the remote address */
-	if (data->host_addrlen != sizeof(addr))
-		return -EINVAL;
-
-	if (copy_from_user(&addr, data->host_addr, sizeof(addr)))
-		return -EFAULT;
-
-	if (addr.sin_family != AF_INET ||
-	    addr.sin_addr.s_addr == INADDR_ANY
-	    ) {
-		dprintk("%s: mount program didn't pass remote IP address!\n",
-				__FUNCTION__);
-		return -EINVAL;
-	}
-	/* RFC3530: The default port for NFS is 2049 */
-	if (addr.sin_port == 0)
-		addr.sin_port = htons(NFS_PORT);
-
-	/* Grab the authentication type */
-	authflavour = RPC_AUTH_UNIX;
-	if (data->auth_flavourlen != 0) {
-		if (data->auth_flavourlen != 1) {
-			dprintk("%s: Invalid number of RPC auth flavours %d.\n",
-					__FUNCTION__, data->auth_flavourlen);
-			error = -EINVAL;
-			goto out_err_noserver;
-		}
-
-		if (copy_from_user(&authflavour, data->auth_flavours,
-				   sizeof(authflavour))) {
-			error = -EFAULT;
-			goto out_err_noserver;
-		}
-	}
-
-	p = nfs_copy_user_string(NULL, &data->hostname, 256);
-	if (IS_ERR(p))
-		goto out_err;
-	hostname = p;
-
-	p = nfs_copy_user_string(NULL, &data->mnt_path, 1024);
-	if (IS_ERR(p))
-		goto out_err;
-	mntpath = p;
-
-	dprintk("MNTPATH: %s\n", mntpath);
-
-	p = nfs_copy_user_string(ip_addr, &data->client_addr,
-				 sizeof(ip_addr) - 1);
-	if (IS_ERR(p))
-		goto out_err;
+	/* Validate the mount data */
+	error = nfs4_validate_mount_data(&data, dev_name, &addr, &authflavour,
+					 &hostname, &mntpath, &ip_addr);
+	if (error < 0)
+		goto out;
 
 	/* Get a volume representation */
 	server = nfs4_create_server(data, hostname, &addr, mntpath, ip_addr,
 				    authflavour, &mntfh);
 	if (IS_ERR(server)) {
 		error = PTR_ERR(server);
-		goto out_err_noserver;
+		goto out;
 	}
 
+	if (server->flags & NFS4_MOUNT_UNSHARED)
+		compare_super = NULL;
+
 	/* Get a superblock - note that we may end up sharing one that already exists */
-	s = sget(fs_type, nfs_compare_super, nfs_set_super, server);
+	s = sget(fs_type, compare_super, nfs_set_super, server);
 	if (IS_ERR(s)) {
 		error = PTR_ERR(s);
 		goto out_free;
@@ -906,25 +1766,22 @@ static int nfs4_get_sb(struct file_system_type *fs_type,
 	s->s_flags |= MS_ACTIVE;
 	mnt->mnt_sb = s;
 	mnt->mnt_root = mntroot;
+	error = 0;
+
+out:
+	kfree(ip_addr);
 	kfree(mntpath);
 	kfree(hostname);
-	return 0;
-
-out_err:
-	error = PTR_ERR(p);
-	goto out_err_noserver;
+	return error;
 
 out_free:
 	nfs_free_server(server);
-out_err_noserver:
-	kfree(mntpath);
-	kfree(hostname);
-	return error;
+	goto out;
 
 error_splat_super:
 	up_write(&s->s_umount);
 	deactivate_super(s);
-	goto out_err_noserver;
+	goto out;
 }
 
 static void nfs4_kill_super(struct super_block *sb)
@@ -949,6 +1806,7 @@ static int nfs4_xdev_get_sb(struct file_system_type *fs_type, int flags,
 	struct super_block *s;
 	struct nfs_server *server;
 	struct dentry *mntroot;
+	int (*compare_super)(struct super_block *, void *) = nfs_compare_super;
 	int error;
 
 	dprintk("--> nfs4_xdev_get_sb()\n");
@@ -960,8 +1818,11 @@ static int nfs4_xdev_get_sb(struct file_system_type *fs_type, int flags,
 		goto out_err_noserver;
 	}
 
+	if (server->flags & NFS4_MOUNT_UNSHARED)
+		compare_super = NULL;
+
 	/* Get a superblock - note that we may end up sharing one that already exists */
-	s = sget(&nfs_fs_type, nfs_compare_super, nfs_set_super, server);
+	s = sget(&nfs_fs_type, compare_super, nfs_set_super, server);
 	if (IS_ERR(s)) {
 		error = PTR_ERR(s);
 		goto out_err_nosb;
@@ -1016,6 +1877,7 @@ static int nfs4_referral_get_sb(struct file_system_type *fs_type, int flags,
 	struct nfs_server *server;
 	struct dentry *mntroot;
 	struct nfs_fh mntfh;
+	int (*compare_super)(struct super_block *, void *) = nfs_compare_super;
 	int error;
 
 	dprintk("--> nfs4_referral_get_sb()\n");
@@ -1027,8 +1889,11 @@ static int nfs4_referral_get_sb(struct file_system_type *fs_type, int flags,
 		goto out_err_noserver;
 	}
 
+	if (server->flags & NFS4_MOUNT_UNSHARED)
+		compare_super = NULL;
+
 	/* Get a superblock - note that we may end up sharing one that already exists */
-	s = sget(&nfs_fs_type, nfs_compare_super, nfs_set_super, server);
+	s = sget(&nfs_fs_type, compare_super, nfs_set_super, server);
 	if (IS_ERR(s)) {
 		error = PTR_ERR(s);
 		goto out_err_nosb;
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index af344a158e01..73ac992ece85 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -117,19 +117,19 @@ static struct nfs_page *nfs_page_find_request_locked(struct page *page)
 	if (PagePrivate(page)) {
 		req = (struct nfs_page *)page_private(page);
 		if (req != NULL)
-			atomic_inc(&req->wb_count);
+			kref_get(&req->wb_kref);
 	}
 	return req;
 }
 
 static struct nfs_page *nfs_page_find_request(struct page *page)
 {
+	struct inode *inode = page->mapping->host;
 	struct nfs_page *req = NULL;
-	spinlock_t *req_lock = &NFS_I(page->mapping->host)->req_lock;
 
-	spin_lock(req_lock);
+	spin_lock(&inode->i_lock);
 	req = nfs_page_find_request_locked(page);
-	spin_unlock(req_lock);
+	spin_unlock(&inode->i_lock);
 	return req;
 }
 
@@ -191,8 +191,6 @@ static int nfs_writepage_setup(struct nfs_open_context *ctx, struct page *page,
 	}
 	/* Update file length */
 	nfs_grow_file(page, offset, count);
-	/* Set the PG_uptodate flag? */
-	nfs_mark_uptodate(page, offset, count);
 	nfs_unlock_request(req);
 	return 0;
 }
@@ -253,16 +251,16 @@ static void nfs_end_page_writeback(struct page *page)
 static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio,
 				struct page *page)
 {
+	struct inode *inode = page->mapping->host;
+	struct nfs_inode *nfsi = NFS_I(inode);
 	struct nfs_page *req;
-	struct nfs_inode *nfsi = NFS_I(page->mapping->host);
-	spinlock_t *req_lock = &nfsi->req_lock;
 	int ret;
 
-	spin_lock(req_lock);
+	spin_lock(&inode->i_lock);
 	for(;;) {
 		req = nfs_page_find_request_locked(page);
 		if (req == NULL) {
-			spin_unlock(req_lock);
+			spin_unlock(&inode->i_lock);
 			return 1;
 		}
 		if (nfs_lock_request_dontget(req))
@@ -272,28 +270,28 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio,
 		 *	 succeed provided that someone hasn't already marked the
 		 *	 request as dirty (in which case we don't care).
 		 */
-		spin_unlock(req_lock);
+		spin_unlock(&inode->i_lock);
 		ret = nfs_wait_on_request(req);
 		nfs_release_request(req);
 		if (ret != 0)
 			return ret;
-		spin_lock(req_lock);
+		spin_lock(&inode->i_lock);
 	}
 	if (test_bit(PG_NEED_COMMIT, &req->wb_flags)) {
 		/* This request is marked for commit */
-		spin_unlock(req_lock);
+		spin_unlock(&inode->i_lock);
 		nfs_unlock_request(req);
 		nfs_pageio_complete(pgio);
 		return 1;
 	}
 	if (nfs_set_page_writeback(page) != 0) {
-		spin_unlock(req_lock);
+		spin_unlock(&inode->i_lock);
 		BUG();
 	}
 	radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index,
-			NFS_PAGE_TAG_WRITEBACK);
+			NFS_PAGE_TAG_LOCKED);
 	ret = test_bit(PG_NEED_FLUSH, &req->wb_flags);
-	spin_unlock(req_lock);
+	spin_unlock(&inode->i_lock);
 	nfs_pageio_add_request(pgio, req);
 	return ret;
 }
@@ -400,7 +398,7 @@ static int nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
 	if (PageDirty(req->wb_page))
 		set_bit(PG_NEED_FLUSH, &req->wb_flags);
 	nfsi->npages++;
-	atomic_inc(&req->wb_count);
+	kref_get(&req->wb_kref);
 	return 0;
 }
 
@@ -409,12 +407,12 @@ static int nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
  */
 static void nfs_inode_remove_request(struct nfs_page *req)
 {
-	struct inode *inode = req->wb_context->dentry->d_inode;
+	struct inode *inode = req->wb_context->path.dentry->d_inode;
 	struct nfs_inode *nfsi = NFS_I(inode);
 
 	BUG_ON (!NFS_WBACK_BUSY(req));
 
-	spin_lock(&nfsi->req_lock);
+	spin_lock(&inode->i_lock);
 	set_page_private(req->wb_page, 0);
 	ClearPagePrivate(req->wb_page);
 	radix_tree_delete(&nfsi->nfs_page_tree, req->wb_index);
@@ -422,11 +420,11 @@ static void nfs_inode_remove_request(struct nfs_page *req)
 		__set_page_dirty_nobuffers(req->wb_page);
 	nfsi->npages--;
 	if (!nfsi->npages) {
-		spin_unlock(&nfsi->req_lock);
+		spin_unlock(&inode->i_lock);
 		nfs_end_data_update(inode);
 		iput(inode);
 	} else
-		spin_unlock(&nfsi->req_lock);
+		spin_unlock(&inode->i_lock);
 	nfs_clear_request(req);
 	nfs_release_request(req);
 }
@@ -457,14 +455,16 @@ nfs_dirty_request(struct nfs_page *req)
 static void
 nfs_mark_request_commit(struct nfs_page *req)
 {
-	struct inode *inode = req->wb_context->dentry->d_inode;
+	struct inode *inode = req->wb_context->path.dentry->d_inode;
 	struct nfs_inode *nfsi = NFS_I(inode);
 
-	spin_lock(&nfsi->req_lock);
-	nfs_list_add_request(req, &nfsi->commit);
+	spin_lock(&inode->i_lock);
 	nfsi->ncommit++;
 	set_bit(PG_NEED_COMMIT, &(req)->wb_flags);
-	spin_unlock(&nfsi->req_lock);
+	radix_tree_tag_set(&nfsi->nfs_page_tree,
+			req->wb_index,
+			NFS_PAGE_TAG_COMMIT);
+	spin_unlock(&inode->i_lock);
 	inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
 	__mark_inode_dirty(inode, I_DIRTY_DATASYNC);
 }
@@ -526,18 +526,18 @@ static int nfs_wait_on_requests_locked(struct inode *inode, pgoff_t idx_start, u
 		idx_end = idx_start + npages - 1;
 
 	next = idx_start;
-	while (radix_tree_gang_lookup_tag(&nfsi->nfs_page_tree, (void **)&req, next, 1, NFS_PAGE_TAG_WRITEBACK)) {
+	while (radix_tree_gang_lookup_tag(&nfsi->nfs_page_tree, (void **)&req, next, 1, NFS_PAGE_TAG_LOCKED)) {
 		if (req->wb_index > idx_end)
 			break;
 
 		next = req->wb_index + 1;
 		BUG_ON(!NFS_WBACK_BUSY(req));
 
-		atomic_inc(&req->wb_count);
-		spin_unlock(&nfsi->req_lock);
+		kref_get(&req->wb_kref);
+		spin_unlock(&inode->i_lock);
 		error = nfs_wait_on_request(req);
 		nfs_release_request(req);
-		spin_lock(&nfsi->req_lock);
+		spin_lock(&inode->i_lock);
 		if (error < 0)
 			return error;
 		res++;
@@ -577,10 +577,9 @@ nfs_scan_commit(struct inode *inode, struct list_head *dst, pgoff_t idx_start, u
 	int res = 0;
 
 	if (nfsi->ncommit != 0) {
-		res = nfs_scan_list(nfsi, &nfsi->commit, dst, idx_start, npages);
+		res = nfs_scan_list(nfsi, dst, idx_start, npages,
+				NFS_PAGE_TAG_COMMIT);
 		nfsi->ncommit -= res;
-		if ((nfsi->ncommit == 0) != list_empty(&nfsi->commit))
-			printk(KERN_ERR "NFS: desynchronized value of nfs_i.ncommit.\n");
 	}
 	return res;
 }
@@ -603,7 +602,6 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx,
 {
 	struct address_space *mapping = page->mapping;
 	struct inode *inode = mapping->host;
-	struct nfs_inode *nfsi = NFS_I(inode);
 	struct nfs_page		*req, *new = NULL;
 	pgoff_t		rqend, end;
 
@@ -613,13 +611,13 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx,
 		/* Loop over all inode entries and see if we find
 		 * A request for the page we wish to update
 		 */
-		spin_lock(&nfsi->req_lock);
+		spin_lock(&inode->i_lock);
 		req = nfs_page_find_request_locked(page);
 		if (req) {
 			if (!nfs_lock_request_dontget(req)) {
 				int error;
 
-				spin_unlock(&nfsi->req_lock);
+				spin_unlock(&inode->i_lock);
 				error = nfs_wait_on_request(req);
 				nfs_release_request(req);
 				if (error < 0) {
@@ -629,7 +627,7 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx,
 				}
 				continue;
 			}
-			spin_unlock(&nfsi->req_lock);
+			spin_unlock(&inode->i_lock);
 			if (new)
 				nfs_release_request(new);
 			break;
@@ -640,14 +638,14 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx,
 			nfs_lock_request_dontget(new);
 			error = nfs_inode_add_request(inode, new);
 			if (error) {
-				spin_unlock(&nfsi->req_lock);
+				spin_unlock(&inode->i_lock);
 				nfs_unlock_request(new);
 				return ERR_PTR(error);
 			}
-			spin_unlock(&nfsi->req_lock);
+			spin_unlock(&inode->i_lock);
 			return new;
 		}
-		spin_unlock(&nfsi->req_lock);
+		spin_unlock(&inode->i_lock);
 
 		new = nfs_create_request(ctx, inode, page, offset, bytes);
 		if (IS_ERR(new))
@@ -751,12 +749,17 @@ int nfs_updatepage(struct file *file, struct page *page,
 static void nfs_writepage_release(struct nfs_page *req)
 {
 
-	if (PageError(req->wb_page) || !nfs_reschedule_unstable_write(req)) {
+	if (PageError(req->wb_page)) {
+		nfs_end_page_writeback(req->wb_page);
+		nfs_inode_remove_request(req);
+	} else if (!nfs_reschedule_unstable_write(req)) {
+		/* Set the PG_uptodate flag */
+		nfs_mark_uptodate(req->wb_page, req->wb_pgbase, req->wb_bytes);
 		nfs_end_page_writeback(req->wb_page);
 		nfs_inode_remove_request(req);
 	} else
 		nfs_end_page_writeback(req->wb_page);
-	nfs_clear_page_writeback(req);
+	nfs_clear_page_tag_locked(req);
 }
 
 static inline int flush_task_priority(int how)
@@ -786,7 +789,7 @@ static void nfs_write_rpcsetup(struct nfs_page *req,
 	 * NB: take care not to mess about with data->commit et al. */
 
 	data->req = req;
-	data->inode = inode = req->wb_context->dentry->d_inode;
+	data->inode = inode = req->wb_context->path.dentry->d_inode;
 	data->cred = req->wb_context->cred;
 
 	data->args.fh     = NFS_FH(inode);
@@ -885,7 +888,7 @@ out_bad:
 	}
 	nfs_redirty_request(req);
 	nfs_end_page_writeback(req->wb_page);
-	nfs_clear_page_writeback(req);
+	nfs_clear_page_tag_locked(req);
 	return -ENOMEM;
 }
 
@@ -928,7 +931,7 @@ static int nfs_flush_one(struct inode *inode, struct list_head *head, unsigned i
 		nfs_list_remove_request(req);
 		nfs_redirty_request(req);
 		nfs_end_page_writeback(req->wb_page);
-		nfs_clear_page_writeback(req);
+		nfs_clear_page_tag_locked(req);
 	}
 	return -ENOMEM;
 }
@@ -954,8 +957,8 @@ static void nfs_writeback_done_partial(struct rpc_task *task, void *calldata)
 	struct page		*page = req->wb_page;
 
 	dprintk("NFS: write (%s/%Ld %d@%Ld)",
-		req->wb_context->dentry->d_inode->i_sb->s_id,
-		(long long)NFS_FILEID(req->wb_context->dentry->d_inode),
+		req->wb_context->path.dentry->d_inode->i_sb->s_id,
+		(long long)NFS_FILEID(req->wb_context->path.dentry->d_inode),
 		req->wb_bytes,
 		(long long)req_offset(req));
 
@@ -970,9 +973,9 @@ static void nfs_writeback_done_partial(struct rpc_task *task, void *calldata)
 	}
 
 	if (nfs_write_need_commit(data)) {
-		spinlock_t *req_lock = &NFS_I(page->mapping->host)->req_lock;
+		struct inode *inode = page->mapping->host;
 
-		spin_lock(req_lock);
+		spin_lock(&inode->i_lock);
 		if (test_bit(PG_NEED_RESCHED, &req->wb_flags)) {
 			/* Do nothing we need to resend the writes */
 		} else if (!test_and_set_bit(PG_NEED_COMMIT, &req->wb_flags)) {
@@ -983,7 +986,7 @@ static void nfs_writeback_done_partial(struct rpc_task *task, void *calldata)
 			clear_bit(PG_NEED_COMMIT, &req->wb_flags);
 			dprintk(" server reboot detected\n");
 		}
-		spin_unlock(req_lock);
+		spin_unlock(&inode->i_lock);
 	} else
 		dprintk(" OK\n");
 
@@ -1020,8 +1023,8 @@ static void nfs_writeback_done_full(struct rpc_task *task, void *calldata)
 		page = req->wb_page;
 
 		dprintk("NFS: write (%s/%Ld %d@%Ld)",
-			req->wb_context->dentry->d_inode->i_sb->s_id,
-			(long long)NFS_FILEID(req->wb_context->dentry->d_inode),
+			req->wb_context->path.dentry->d_inode->i_sb->s_id,
+			(long long)NFS_FILEID(req->wb_context->path.dentry->d_inode),
 			req->wb_bytes,
 			(long long)req_offset(req));
 
@@ -1039,12 +1042,14 @@ static void nfs_writeback_done_full(struct rpc_task *task, void *calldata)
 			dprintk(" marked for commit\n");
 			goto next;
 		}
+		/* Set the PG_uptodate flag? */
+		nfs_mark_uptodate(page, req->wb_pgbase, req->wb_bytes);
 		dprintk(" OK\n");
 remove_request:
 		nfs_end_page_writeback(page);
 		nfs_inode_remove_request(req);
 	next:
-		nfs_clear_page_writeback(req);
+		nfs_clear_page_tag_locked(req);
 	}
 }
 
@@ -1157,7 +1162,7 @@ static void nfs_commit_rpcsetup(struct list_head *head,
 
 	list_splice_init(head, &data->pages);
 	first = nfs_list_entry(data->pages.next);
-	inode = first->wb_context->dentry->d_inode;
+	inode = first->wb_context->path.dentry->d_inode;
 
 	data->inode	  = inode;
 	data->cred	  = first->wb_context->cred;
@@ -1207,7 +1212,7 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how)
 		nfs_list_remove_request(req);
 		nfs_mark_request_commit(req);
 		dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
-		nfs_clear_page_writeback(req);
+		nfs_clear_page_tag_locked(req);
 	}
 	return -ENOMEM;
 }
@@ -1234,8 +1239,8 @@ static void nfs_commit_done(struct rpc_task *task, void *calldata)
 		dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
 
 		dprintk("NFS: commit (%s/%Ld %d@%Ld)",
-			req->wb_context->dentry->d_inode->i_sb->s_id,
-			(long long)NFS_FILEID(req->wb_context->dentry->d_inode),
+			req->wb_context->path.dentry->d_inode->i_sb->s_id,
+			(long long)NFS_FILEID(req->wb_context->path.dentry->d_inode),
 			req->wb_bytes,
 			(long long)req_offset(req));
 		if (task->tk_status < 0) {
@@ -1249,6 +1254,9 @@ static void nfs_commit_done(struct rpc_task *task, void *calldata)
 		 * returned by the server against all stored verfs. */
 		if (!memcmp(req->wb_verf.verifier, data->verf.verifier, sizeof(data->verf.verifier))) {
 			/* We have a match */
+			/* Set the PG_uptodate flag */
+			nfs_mark_uptodate(req->wb_page, req->wb_pgbase,
+					req->wb_bytes);
 			nfs_inode_remove_request(req);
 			dprintk(" OK\n");
 			goto next;
@@ -1257,7 +1265,7 @@ static void nfs_commit_done(struct rpc_task *task, void *calldata)
 		dprintk(" mismatch\n");
 		nfs_redirty_request(req);
 	next:
-		nfs_clear_page_writeback(req);
+		nfs_clear_page_tag_locked(req);
 	}
 }
 
@@ -1268,13 +1276,12 @@ static const struct rpc_call_ops nfs_commit_ops = {
 
 int nfs_commit_inode(struct inode *inode, int how)
 {
-	struct nfs_inode *nfsi = NFS_I(inode);
 	LIST_HEAD(head);
 	int res;
 
-	spin_lock(&nfsi->req_lock);
+	spin_lock(&inode->i_lock);
 	res = nfs_scan_commit(inode, &head, 0, 0);
-	spin_unlock(&nfsi->req_lock);
+	spin_unlock(&inode->i_lock);
 	if (res) {
 		int error = nfs_commit_list(inode, &head, how);
 		if (error < 0)
@@ -1292,7 +1299,6 @@ static inline int nfs_commit_list(struct inode *inode, struct list_head *head, i
 long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_control *wbc, int how)
 {
 	struct inode *inode = mapping->host;
-	struct nfs_inode *nfsi = NFS_I(inode);
 	pgoff_t idx_start, idx_end;
 	unsigned int npages = 0;
 	LIST_HEAD(head);
@@ -1314,7 +1320,7 @@ long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_contr
 		}
 	}
 	how &= ~FLUSH_NOCOMMIT;
-	spin_lock(&nfsi->req_lock);
+	spin_lock(&inode->i_lock);
 	do {
 		ret = nfs_wait_on_requests_locked(inode, idx_start, npages);
 		if (ret != 0)
@@ -1325,18 +1331,19 @@ long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_contr
 		if (pages == 0)
 			break;
 		if (how & FLUSH_INVALIDATE) {
-			spin_unlock(&nfsi->req_lock);
+			spin_unlock(&inode->i_lock);
 			nfs_cancel_commit_list(&head);
 			ret = pages;
-			spin_lock(&nfsi->req_lock);
+			spin_lock(&inode->i_lock);
 			continue;
 		}
 		pages += nfs_scan_commit(inode, &head, 0, 0);
-		spin_unlock(&nfsi->req_lock);
+		spin_unlock(&inode->i_lock);
 		ret = nfs_commit_list(inode, &head, how);
-		spin_lock(&nfsi->req_lock);
+		spin_lock(&inode->i_lock);
+
 	} while (ret >= 0);
-	spin_unlock(&nfsi->req_lock);
+	spin_unlock(&inode->i_lock);
 	return ret;
 }
 
@@ -1430,7 +1437,6 @@ int nfs_set_page_dirty(struct page *page)
 {
 	struct address_space *mapping = page->mapping;
 	struct inode *inode;
-	spinlock_t *req_lock;
 	struct nfs_page *req;
 	int ret;
 
@@ -1439,18 +1445,17 @@ int nfs_set_page_dirty(struct page *page)
 	inode = mapping->host;
 	if (!inode)
 		goto out_raced;
-	req_lock = &NFS_I(inode)->req_lock;
-	spin_lock(req_lock);
+	spin_lock(&inode->i_lock);
 	req = nfs_page_find_request_locked(page);
 	if (req != NULL) {
 		/* Mark any existing write requests for flushing */
 		ret = !test_and_set_bit(PG_NEED_FLUSH, &req->wb_flags);
-		spin_unlock(req_lock);
+		spin_unlock(&inode->i_lock);
 		nfs_release_request(req);
 		return ret;
 	}
 	ret = __set_page_dirty_nobuffers(page);
-	spin_unlock(req_lock);
+	spin_unlock(&inode->i_lock);
 	return ret;
 out_raced:
 	return !TestSetPageDirty(page);
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 864090edc28b..5443c52b57aa 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -394,7 +394,6 @@ nfsd4_probe_callback(struct nfs4_client *clp)
 		.rpc_proc       = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL],
 		.rpc_argp       = clp,
 	};
-	char clientname[16];
 	int status;
 
 	if (atomic_read(&cb->cb_set))
@@ -417,11 +416,6 @@ nfsd4_probe_callback(struct nfs4_client *clp)
 	memset(program->stats, 0, sizeof(cb->cb_stat));
 	program->stats->program = program;
 
-	/* Just here to make some printk's more useful: */
-	snprintf(clientname, sizeof(clientname),
-		"%u.%u.%u.%u", NIPQUAD(addr.sin_addr));
-	args.servername = clientname;
-
 	/* Create RPC client */
 	cb->cb_client = rpc_create(&args);
 	if (IS_ERR(cb->cb_client)) {
@@ -429,29 +423,23 @@ nfsd4_probe_callback(struct nfs4_client *clp)
 		goto out_err;
 	}
 
-	/* Kick rpciod, put the call on the wire. */
-	if (rpciod_up() != 0)
-		goto out_clnt;
-
 	/* the task holds a reference to the nfs4_client struct */
 	atomic_inc(&clp->cl_count);
 
 	msg.rpc_cred = nfsd4_lookupcred(clp,0);
 	if (IS_ERR(msg.rpc_cred))
-		goto out_rpciod;
+		goto out_release_clp;
 	status = rpc_call_async(cb->cb_client, &msg, RPC_TASK_ASYNC, &nfs4_cb_null_ops, NULL);
 	put_rpccred(msg.rpc_cred);
 
 	if (status != 0) {
 		dprintk("NFSD: asynchronous NFSPROC4_CB_NULL failed!\n");
-		goto out_rpciod;
+		goto out_release_clp;
 	}
 	return;
 
-out_rpciod:
+out_release_clp:
 	atomic_dec(&clp->cl_count);
-	rpciod_down();
-out_clnt:
 	rpc_shutdown_client(cb->cb_client);
 out_err:
 	cb->cb_client = NULL;
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 3cc8ce422ab1..8c52913d7cb6 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -378,7 +378,6 @@ shutdown_callback_client(struct nfs4_client *clp)
 	if (clnt) {
 		clp->cl_callback.cb_client = NULL;
 		rpc_shutdown_client(clnt);
-		rpciod_down();
 	}
 }
 
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 8604e35bd48e..945b1cedde2b 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -879,6 +879,7 @@ nfsd_vfs_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
 			.u.data		= rqstp,
 		};
 
+		rqstp->rq_resused = 1;
 		host_err = splice_direct_to_actor(file, &sd, nfsd_direct_splice_actor);
 	} else {
 		oldfs = get_fs();
diff --git a/fs/ocfs2/cluster/masklog.c b/fs/ocfs2/cluster/masklog.c
index 2b205f5d5790..e9e042b93dbf 100644
--- a/fs/ocfs2/cluster/masklog.c
+++ b/fs/ocfs2/cluster/masklog.c
@@ -74,7 +74,6 @@ struct mlog_attribute {
 #define define_mask(_name) {			\
 	.attr = {				\
 		.name = #_name,			\
-		.owner = THIS_MODULE,		\
 		.mode = S_IRUGO | S_IWUSR,	\
 	},					\
 	.mask = ML_##_name,			\
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index 9a3a058f3553..98e0b85a9bb2 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -397,7 +397,6 @@ void add_partition(struct gendisk *disk, int part, sector_t start, sector_t len,
 		static struct attribute addpartattr = {
 			.name = "whole_disk",
 			.mode = S_IRUSR | S_IRGRP | S_IROTH,
-			.owner = THIS_MODULE,
 		};
 
 		sysfs_create_file(&p->kobj, &addpartattr);
diff --git a/fs/splice.c b/fs/splice.c
index ed2ce995475c..6c9828651e6f 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -28,6 +28,7 @@
 #include <linux/module.h>
 #include <linux/syscalls.h>
 #include <linux/uio.h>
+#include <linux/security.h>
 
 /*
  * Attempt to steal a page from a pipe buffer. This should perhaps go into
@@ -491,7 +492,7 @@ ssize_t generic_file_splice_read(struct file *in, loff_t *ppos,
 
 	ret = 0;
 	spliced = 0;
-	while (len) {
+	while (len && !spliced) {
 		ret = __generic_file_splice_read(in, ppos, pipe, len, flags);
 
 		if (ret < 0)
@@ -961,6 +962,10 @@ static long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
 	if (unlikely(ret < 0))
 		return ret;
 
+	ret = security_file_permission(out, MAY_WRITE);
+	if (unlikely(ret < 0))
+		return ret;
+
 	return out->f_op->splice_write(pipe, out, ppos, len, flags);
 }
 
@@ -983,6 +988,10 @@ static long do_splice_to(struct file *in, loff_t *ppos,
 	if (unlikely(ret < 0))
 		return ret;
 
+	ret = security_file_permission(in, MAY_READ);
+	if (unlikely(ret < 0))
+		return ret;
+
 	return in->f_op->splice_read(in, ppos, pipe, len, flags);
 }
 
@@ -1051,15 +1060,10 @@ ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd,
 	sd->flags &= ~SPLICE_F_NONBLOCK;
 
 	while (len) {
-		size_t read_len, max_read_len;
-
-		/*
-		 * Do at most PIPE_BUFFERS pages worth of transfer:
-		 */
-		max_read_len = min(len, (size_t)(PIPE_BUFFERS*PAGE_SIZE));
+		size_t read_len;
 
-		ret = do_splice_to(in, &sd->pos, pipe, max_read_len, flags);
-		if (unlikely(ret < 0))
+		ret = do_splice_to(in, &sd->pos, pipe, len, flags);
+		if (unlikely(ret <= 0))
 			goto out_release;
 
 		read_len = ret;
@@ -1071,26 +1075,17 @@ ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd,
 		 * could get stuck data in the internal pipe:
 		 */
 		ret = actor(pipe, sd);
-		if (unlikely(ret < 0))
+		if (unlikely(ret <= 0))
 			goto out_release;
 
 		bytes += ret;
 		len -= ret;
 
-		/*
-		 * In nonblocking mode, if we got back a short read then
-		 * that was due to either an IO error or due to the
-		 * pagecache entry not being there. In the IO error case
-		 * the _next_ splice attempt will produce a clean IO error
-		 * return value (not a short read), so in both cases it's
-		 * correct to break out of the loop here:
-		 */
-		if ((flags & SPLICE_F_NONBLOCK) && (read_len < max_read_len))
-			break;
+		if (ret < read_len)
+			goto out_release;
 	}
 
 	pipe->nrbufs = pipe->curbuf = 0;
-
 	return bytes;
 
 out_release:
@@ -1152,10 +1147,12 @@ long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
 		.pos		= *ppos,
 		.u.file		= out,
 	};
-	size_t ret;
+	long ret;
 
 	ret = splice_direct_to_actor(in, &sd, direct_splice_actor);
-	*ppos = sd.pos;
+	if (ret > 0)
+		*ppos += ret;
+
 	return ret;
 }
 
diff --git a/fs/sysfs/bin.c b/fs/sysfs/bin.c
index d3b9f5f07db1..135353f8a296 100644
--- a/fs/sysfs/bin.c
+++ b/fs/sysfs/bin.c
@@ -20,29 +20,41 @@
 
 #include "sysfs.h"
 
+struct bin_buffer {
+	struct mutex	mutex;
+	void		*buffer;
+	int		mmapped;
+};
+
 static int
 fill_read(struct dentry *dentry, char *buffer, loff_t off, size_t count)
 {
-	struct bin_attribute * attr = to_bin_attr(dentry);
-	struct kobject * kobj = to_kobj(dentry->d_parent);
+	struct sysfs_dirent *attr_sd = dentry->d_fsdata;
+	struct bin_attribute *attr = attr_sd->s_elem.bin_attr.bin_attr;
+	struct kobject *kobj = attr_sd->s_parent->s_elem.dir.kobj;
+	int rc;
+
+	/* need attr_sd for attr, its parent for kobj */
+	if (!sysfs_get_active_two(attr_sd))
+		return -ENODEV;
 
-	if (!attr->read)
-		return -EIO;
+	rc = -EIO;
+	if (attr->read)
+		rc = attr->read(kobj, attr, buffer, off, count);
 
-	return attr->read(kobj, buffer, off, count);
+	sysfs_put_active_two(attr_sd);
+
+	return rc;
 }
 
 static ssize_t
-read(struct file * file, char __user * userbuf, size_t count, loff_t * off)
+read(struct file *file, char __user *userbuf, size_t bytes, loff_t *off)
 {
-	char *buffer = file->private_data;
+	struct bin_buffer *bb = file->private_data;
 	struct dentry *dentry = file->f_path.dentry;
 	int size = dentry->d_inode->i_size;
 	loff_t offs = *off;
-	int ret;
-
-	if (count > PAGE_SIZE)
-		count = PAGE_SIZE;
+	int count = min_t(size_t, bytes, PAGE_SIZE);
 
 	if (size) {
 		if (offs > size)
@@ -51,43 +63,56 @@ read(struct file * file, char __user * userbuf, size_t count, loff_t * off)
 			count = size - offs;
 	}
 
-	ret = fill_read(dentry, buffer, offs, count);
-	if (ret < 0) 
-		return ret;
-	count = ret;
+	mutex_lock(&bb->mutex);
+
+	count = fill_read(dentry, bb->buffer, offs, count);
+	if (count < 0)
+		goto out_unlock;
 
-	if (copy_to_user(userbuf, buffer, count))
-		return -EFAULT;
+	if (copy_to_user(userbuf, bb->buffer, count)) {
+		count = -EFAULT;
+		goto out_unlock;
+	}
 
-	pr_debug("offs = %lld, *off = %lld, count = %zd\n", offs, *off, count);
+	pr_debug("offs = %lld, *off = %lld, count = %d\n", offs, *off, count);
 
 	*off = offs + count;
 
+ out_unlock:
+	mutex_unlock(&bb->mutex);
 	return count;
 }
 
 static int
 flush_write(struct dentry *dentry, char *buffer, loff_t offset, size_t count)
 {
-	struct bin_attribute *attr = to_bin_attr(dentry);
-	struct kobject *kobj = to_kobj(dentry->d_parent);
+	struct sysfs_dirent *attr_sd = dentry->d_fsdata;
+	struct bin_attribute *attr = attr_sd->s_elem.bin_attr.bin_attr;
+	struct kobject *kobj = attr_sd->s_parent->s_elem.dir.kobj;
+	int rc;
+
+	/* need attr_sd for attr, its parent for kobj */
+	if (!sysfs_get_active_two(attr_sd))
+		return -ENODEV;
+
+	rc = -EIO;
+	if (attr->write)
+		rc = attr->write(kobj, attr, buffer, offset, count);
 
-	if (!attr->write)
-		return -EIO;
+	sysfs_put_active_two(attr_sd);
 
-	return attr->write(kobj, buffer, offset, count);
+	return rc;
 }
 
-static ssize_t write(struct file * file, const char __user * userbuf,
-		     size_t count, loff_t * off)
+static ssize_t write(struct file *file, const char __user *userbuf,
+		     size_t bytes, loff_t *off)
 {
-	char *buffer = file->private_data;
+	struct bin_buffer *bb = file->private_data;
 	struct dentry *dentry = file->f_path.dentry;
 	int size = dentry->d_inode->i_size;
 	loff_t offs = *off;
+	int count = min_t(size_t, bytes, PAGE_SIZE);
 
-	if (count > PAGE_SIZE)
-		count = PAGE_SIZE;
 	if (size) {
 		if (offs > size)
 			return 0;
@@ -95,72 +120,100 @@ static ssize_t write(struct file * file, const char __user * userbuf,
 			count = size - offs;
 	}
 
-	if (copy_from_user(buffer, userbuf, count))
-		return -EFAULT;
+	mutex_lock(&bb->mutex);
 
-	count = flush_write(dentry, buffer, offs, count);
+	if (copy_from_user(bb->buffer, userbuf, count)) {
+		count = -EFAULT;
+		goto out_unlock;
+	}
+
+	count = flush_write(dentry, bb->buffer, offs, count);
 	if (count > 0)
 		*off = offs + count;
+
+ out_unlock:
+	mutex_unlock(&bb->mutex);
 	return count;
 }
 
 static int mmap(struct file *file, struct vm_area_struct *vma)
 {
-	struct dentry *dentry = file->f_path.dentry;
-	struct bin_attribute *attr = to_bin_attr(dentry);
-	struct kobject *kobj = to_kobj(dentry->d_parent);
+	struct bin_buffer *bb = file->private_data;
+	struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
+	struct bin_attribute *attr = attr_sd->s_elem.bin_attr.bin_attr;
+	struct kobject *kobj = attr_sd->s_parent->s_elem.dir.kobj;
+	int rc;
+
+	mutex_lock(&bb->mutex);
+
+	/* need attr_sd for attr, its parent for kobj */
+	if (!sysfs_get_active_two(attr_sd))
+		return -ENODEV;
 
-	if (!attr->mmap)
-		return -EINVAL;
+	rc = -EINVAL;
+	if (attr->mmap)
+		rc = attr->mmap(kobj, attr, vma);
 
-	return attr->mmap(kobj, attr, vma);
+	if (rc == 0 && !bb->mmapped)
+		bb->mmapped = 1;
+	else
+		sysfs_put_active_two(attr_sd);
+
+	mutex_unlock(&bb->mutex);
+
+	return rc;
 }
 
 static int open(struct inode * inode, struct file * file)
 {
-	struct kobject *kobj = sysfs_get_kobject(file->f_path.dentry->d_parent);
-	struct bin_attribute * attr = to_bin_attr(file->f_path.dentry);
-	int error = -EINVAL;
-
-	if (!kobj || !attr)
-		goto Done;
+	struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
+	struct bin_attribute *attr = attr_sd->s_elem.bin_attr.bin_attr;
+	struct bin_buffer *bb = NULL;
+	int error;
 
-	/* Grab the module reference for this attribute if we have one */
-	error = -ENODEV;
-	if (!try_module_get(attr->attr.owner)) 
-		goto Done;
+	/* need attr_sd for attr */
+	if (!sysfs_get_active(attr_sd))
+		return -ENODEV;
 
 	error = -EACCES;
 	if ((file->f_mode & FMODE_WRITE) && !(attr->write || attr->mmap))
-		goto Error;
+		goto err_out;
 	if ((file->f_mode & FMODE_READ) && !(attr->read || attr->mmap))
-		goto Error;
+		goto err_out;
 
 	error = -ENOMEM;
-	file->private_data = kmalloc(PAGE_SIZE, GFP_KERNEL);
-	if (!file->private_data)
-		goto Error;
-
-	error = 0;
-    goto Done;
-
- Error:
-	module_put(attr->attr.owner);
- Done:
-	if (error)
-		kobject_put(kobj);
+	bb = kzalloc(sizeof(*bb), GFP_KERNEL);
+	if (!bb)
+		goto err_out;
+
+	bb->buffer = kmalloc(PAGE_SIZE, GFP_KERNEL);
+	if (!bb->buffer)
+		goto err_out;
+
+	mutex_init(&bb->mutex);
+	file->private_data = bb;
+
+	/* open succeeded, put active reference and pin attr_sd */
+	sysfs_put_active(attr_sd);
+	sysfs_get(attr_sd);
+	return 0;
+
+ err_out:
+	sysfs_put_active(attr_sd);
+	kfree(bb);
 	return error;
 }
 
 static int release(struct inode * inode, struct file * file)
 {
-	struct kobject * kobj = to_kobj(file->f_path.dentry->d_parent);
-	struct bin_attribute * attr = to_bin_attr(file->f_path.dentry);
-	u8 * buffer = file->private_data;
-
-	kobject_put(kobj);
-	module_put(attr->attr.owner);
-	kfree(buffer);
+	struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
+	struct bin_buffer *bb = file->private_data;
+
+	if (bb->mmapped)
+		sysfs_put_active_two(attr_sd);
+	sysfs_put(attr_sd);
+	kfree(bb->buffer);
+	kfree(bb);
 	return 0;
 }
 
@@ -181,9 +234,9 @@ const struct file_operations bin_fops = {
 
 int sysfs_create_bin_file(struct kobject * kobj, struct bin_attribute * attr)
 {
-	BUG_ON(!kobj || !kobj->dentry || !attr);
+	BUG_ON(!kobj || !kobj->sd || !attr);
 
-	return sysfs_add_file(kobj->dentry, &attr->attr, SYSFS_KOBJ_BIN_ATTR);
+	return sysfs_add_file(kobj->sd, &attr->attr, SYSFS_KOBJ_BIN_ATTR);
 }
 
 
@@ -195,7 +248,7 @@ int sysfs_create_bin_file(struct kobject * kobj, struct bin_attribute * attr)
 
 void sysfs_remove_bin_file(struct kobject * kobj, struct bin_attribute * attr)
 {
-	if (sysfs_hash_and_remove(kobj->dentry, attr->attr.name) < 0) {
+	if (sysfs_hash_and_remove(kobj->sd, attr->attr.name) < 0) {
 		printk(KERN_ERR "%s: "
 			"bad dentry or inode or no such file: \"%s\"\n",
 			__FUNCTION__, attr->attr.name);
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index c4342a019972..aee966c44aac 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -9,21 +9,337 @@
 #include <linux/module.h>
 #include <linux/kobject.h>
 #include <linux/namei.h>
+#include <linux/idr.h>
+#include <linux/completion.h>
 #include <asm/semaphore.h>
 #include "sysfs.h"
 
-DECLARE_RWSEM(sysfs_rename_sem);
-spinlock_t sysfs_lock = SPIN_LOCK_UNLOCKED;
+DEFINE_MUTEX(sysfs_mutex);
+spinlock_t sysfs_assoc_lock = SPIN_LOCK_UNLOCKED;
+
+static spinlock_t sysfs_ino_lock = SPIN_LOCK_UNLOCKED;
+static DEFINE_IDA(sysfs_ino_ida);
+
+/**
+ *	sysfs_link_sibling - link sysfs_dirent into sibling list
+ *	@sd: sysfs_dirent of interest
+ *
+ *	Link @sd into its sibling list which starts from
+ *	sd->s_parent->s_children.
+ *
+ *	Locking:
+ *	mutex_lock(sysfs_mutex)
+ */
+void sysfs_link_sibling(struct sysfs_dirent *sd)
+{
+	struct sysfs_dirent *parent_sd = sd->s_parent;
+
+	BUG_ON(sd->s_sibling);
+	sd->s_sibling = parent_sd->s_children;
+	parent_sd->s_children = sd;
+}
+
+/**
+ *	sysfs_unlink_sibling - unlink sysfs_dirent from sibling list
+ *	@sd: sysfs_dirent of interest
+ *
+ *	Unlink @sd from its sibling list which starts from
+ *	sd->s_parent->s_children.
+ *
+ *	Locking:
+ *	mutex_lock(sysfs_mutex)
+ */
+void sysfs_unlink_sibling(struct sysfs_dirent *sd)
+{
+	struct sysfs_dirent **pos;
+
+	for (pos = &sd->s_parent->s_children; *pos; pos = &(*pos)->s_sibling) {
+		if (*pos == sd) {
+			*pos = sd->s_sibling;
+			sd->s_sibling = NULL;
+			break;
+		}
+	}
+}
+
+/**
+ *	sysfs_get_dentry - get dentry for the given sysfs_dirent
+ *	@sd: sysfs_dirent of interest
+ *
+ *	Get dentry for @sd.  Dentry is looked up if currently not
+ *	present.  This function climbs sysfs_dirent tree till it
+ *	reaches a sysfs_dirent with valid dentry attached and descends
+ *	down from there looking up dentry for each step.
+ *
+ *	LOCKING:
+ *	Kernel thread context (may sleep)
+ *
+ *	RETURNS:
+ *	Pointer to found dentry on success, ERR_PTR() value on error.
+ */
+struct dentry *sysfs_get_dentry(struct sysfs_dirent *sd)
+{
+	struct sysfs_dirent *cur;
+	struct dentry *parent_dentry, *dentry;
+	int i, depth;
+
+	/* Find the first parent which has valid s_dentry and get the
+	 * dentry.
+	 */
+	mutex_lock(&sysfs_mutex);
+ restart0:
+	spin_lock(&sysfs_assoc_lock);
+ restart1:
+	spin_lock(&dcache_lock);
+
+	dentry = NULL;
+	depth = 0;
+	cur = sd;
+	while (!cur->s_dentry || !cur->s_dentry->d_inode) {
+		if (cur->s_flags & SYSFS_FLAG_REMOVED) {
+			dentry = ERR_PTR(-ENOENT);
+			depth = 0;
+			break;
+		}
+		cur = cur->s_parent;
+		depth++;
+	}
+	if (!IS_ERR(dentry))
+		dentry = dget_locked(cur->s_dentry);
+
+	spin_unlock(&dcache_lock);
+	spin_unlock(&sysfs_assoc_lock);
+
+	/* from the found dentry, look up depth times */
+	while (depth--) {
+		/* find and get depth'th ancestor */
+		for (cur = sd, i = 0; cur && i < depth; i++)
+			cur = cur->s_parent;
+
+		/* This can happen if tree structure was modified due
+		 * to move/rename.  Restart.
+		 */
+		if (i != depth) {
+			dput(dentry);
+			goto restart0;
+		}
+
+		sysfs_get(cur);
+
+		mutex_unlock(&sysfs_mutex);
+
+		/* look it up */
+		parent_dentry = dentry;
+		dentry = lookup_one_len_kern(cur->s_name, parent_dentry,
+					     strlen(cur->s_name));
+		dput(parent_dentry);
+
+		if (IS_ERR(dentry)) {
+			sysfs_put(cur);
+			return dentry;
+		}
+
+		mutex_lock(&sysfs_mutex);
+		spin_lock(&sysfs_assoc_lock);
+
+		/* This, again, can happen if tree structure has
+		 * changed and we looked up the wrong thing.  Restart.
+		 */
+		if (cur->s_dentry != dentry) {
+			dput(dentry);
+			sysfs_put(cur);
+			goto restart1;
+		}
+
+		spin_unlock(&sysfs_assoc_lock);
+
+		sysfs_put(cur);
+	}
+
+	mutex_unlock(&sysfs_mutex);
+	return dentry;
+}
+
+/**
+ *	sysfs_get_active - get an active reference to sysfs_dirent
+ *	@sd: sysfs_dirent to get an active reference to
+ *
+ *	Get an active reference of @sd.  This function is noop if @sd
+ *	is NULL.
+ *
+ *	RETURNS:
+ *	Pointer to @sd on success, NULL on failure.
+ */
+struct sysfs_dirent *sysfs_get_active(struct sysfs_dirent *sd)
+{
+	if (unlikely(!sd))
+		return NULL;
+
+	while (1) {
+		int v, t;
+
+		v = atomic_read(&sd->s_active);
+		if (unlikely(v < 0))
+			return NULL;
+
+		t = atomic_cmpxchg(&sd->s_active, v, v + 1);
+		if (likely(t == v))
+			return sd;
+		if (t < 0)
+			return NULL;
+
+		cpu_relax();
+	}
+}
+
+/**
+ *	sysfs_put_active - put an active reference to sysfs_dirent
+ *	@sd: sysfs_dirent to put an active reference to
+ *
+ *	Put an active reference to @sd.  This function is noop if @sd
+ *	is NULL.
+ */
+void sysfs_put_active(struct sysfs_dirent *sd)
+{
+	struct completion *cmpl;
+	int v;
+
+	if (unlikely(!sd))
+		return;
+
+	v = atomic_dec_return(&sd->s_active);
+	if (likely(v != SD_DEACTIVATED_BIAS))
+		return;
+
+	/* atomic_dec_return() is a mb(), we'll always see the updated
+	 * sd->s_sibling.
+	 */
+	cmpl = (void *)sd->s_sibling;
+	complete(cmpl);
+}
+
+/**
+ *	sysfs_get_active_two - get active references to sysfs_dirent and parent
+ *	@sd: sysfs_dirent of interest
+ *
+ *	Get active reference to @sd and its parent.  Parent's active
+ *	reference is grabbed first.  This function is noop if @sd is
+ *	NULL.
+ *
+ *	RETURNS:
+ *	Pointer to @sd on success, NULL on failure.
+ */
+struct sysfs_dirent *sysfs_get_active_two(struct sysfs_dirent *sd)
+{
+	if (sd) {
+		if (sd->s_parent && unlikely(!sysfs_get_active(sd->s_parent)))
+			return NULL;
+		if (unlikely(!sysfs_get_active(sd))) {
+			sysfs_put_active(sd->s_parent);
+			return NULL;
+		}
+	}
+	return sd;
+}
+
+/**
+ *	sysfs_put_active_two - put active references to sysfs_dirent and parent
+ *	@sd: sysfs_dirent of interest
+ *
+ *	Put active references to @sd and its parent.  This function is
+ *	noop if @sd is NULL.
+ */
+void sysfs_put_active_two(struct sysfs_dirent *sd)
+{
+	if (sd) {
+		sysfs_put_active(sd);
+		sysfs_put_active(sd->s_parent);
+	}
+}
+
+/**
+ *	sysfs_deactivate - deactivate sysfs_dirent
+ *	@sd: sysfs_dirent to deactivate
+ *
+ *	Deny new active references and drain existing ones.
+ */
+static void sysfs_deactivate(struct sysfs_dirent *sd)
+{
+	DECLARE_COMPLETION_ONSTACK(wait);
+	int v;
+
+	BUG_ON(sd->s_sibling || !(sd->s_flags & SYSFS_FLAG_REMOVED));
+	sd->s_sibling = (void *)&wait;
+
+	/* atomic_add_return() is a mb(), put_active() will always see
+	 * the updated sd->s_sibling.
+	 */
+	v = atomic_add_return(SD_DEACTIVATED_BIAS, &sd->s_active);
+
+	if (v != SD_DEACTIVATED_BIAS)
+		wait_for_completion(&wait);
+
+	sd->s_sibling = NULL;
+}
+
+static int sysfs_alloc_ino(ino_t *pino)
+{
+	int ino, rc;
+
+ retry:
+	spin_lock(&sysfs_ino_lock);
+	rc = ida_get_new_above(&sysfs_ino_ida, 2, &ino);
+	spin_unlock(&sysfs_ino_lock);
+
+	if (rc == -EAGAIN) {
+		if (ida_pre_get(&sysfs_ino_ida, GFP_KERNEL))
+			goto retry;
+		rc = -ENOMEM;
+	}
+
+	*pino = ino;
+	return rc;
+}
+
+static void sysfs_free_ino(ino_t ino)
+{
+	spin_lock(&sysfs_ino_lock);
+	ida_remove(&sysfs_ino_ida, ino);
+	spin_unlock(&sysfs_ino_lock);
+}
+
+void release_sysfs_dirent(struct sysfs_dirent * sd)
+{
+	struct sysfs_dirent *parent_sd;
+
+ repeat:
+	/* Moving/renaming is always done while holding reference.
+	 * sd->s_parent won't change beneath us.
+	 */
+	parent_sd = sd->s_parent;
+
+	if (sysfs_type(sd) == SYSFS_KOBJ_LINK)
+		sysfs_put(sd->s_elem.symlink.target_sd);
+	if (sysfs_type(sd) & SYSFS_COPY_NAME)
+		kfree(sd->s_name);
+	kfree(sd->s_iattr);
+	sysfs_free_ino(sd->s_ino);
+	kmem_cache_free(sysfs_dir_cachep, sd);
+
+	sd = parent_sd;
+	if (sd && atomic_dec_and_test(&sd->s_count))
+		goto repeat;
+}
 
 static void sysfs_d_iput(struct dentry * dentry, struct inode * inode)
 {
 	struct sysfs_dirent * sd = dentry->d_fsdata;
 
 	if (sd) {
-		/* sd->s_dentry is protected with sysfs_lock.  This
-		 * allows sysfs_drop_dentry() to dereference it.
+		/* sd->s_dentry is protected with sysfs_assoc_lock.
+		 * This allows sysfs_drop_dentry() to dereference it.
 		 */
-		spin_lock(&sysfs_lock);
+		spin_lock(&sysfs_assoc_lock);
 
 		/* The dentry might have been deleted or another
 		 * lookup could have happened updating sd->s_dentry to
@@ -32,7 +348,7 @@ static void sysfs_d_iput(struct dentry * dentry, struct inode * inode)
 		 */
 		if (sd->s_dentry == dentry)
 			sd->s_dentry = NULL;
-		spin_unlock(&sysfs_lock);
+		spin_unlock(&sysfs_assoc_lock);
 		sysfs_put(sd);
 	}
 	iput(inode);
@@ -42,260 +358,402 @@ static struct dentry_operations sysfs_dentry_ops = {
 	.d_iput		= sysfs_d_iput,
 };
 
-static unsigned int sysfs_inode_counter;
-ino_t sysfs_get_inum(void)
+struct sysfs_dirent *sysfs_new_dirent(const char *name, umode_t mode, int type)
 {
-	if (unlikely(sysfs_inode_counter < 3))
-		sysfs_inode_counter = 3;
-	return sysfs_inode_counter++;
-}
+	char *dup_name = NULL;
+	struct sysfs_dirent *sd = NULL;
 
-/*
- * Allocates a new sysfs_dirent and links it to the parent sysfs_dirent
- */
-static struct sysfs_dirent * __sysfs_new_dirent(void * element)
-{
-	struct sysfs_dirent * sd;
+	if (type & SYSFS_COPY_NAME) {
+		name = dup_name = kstrdup(name, GFP_KERNEL);
+		if (!name)
+			goto err_out;
+	}
 
 	sd = kmem_cache_zalloc(sysfs_dir_cachep, GFP_KERNEL);
 	if (!sd)
-		return NULL;
+		goto err_out;
+
+	if (sysfs_alloc_ino(&sd->s_ino))
+		goto err_out;
 
-	sd->s_ino = sysfs_get_inum();
 	atomic_set(&sd->s_count, 1);
+	atomic_set(&sd->s_active, 0);
 	atomic_set(&sd->s_event, 1);
-	INIT_LIST_HEAD(&sd->s_children);
-	INIT_LIST_HEAD(&sd->s_sibling);
-	sd->s_element = element;
+
+	sd->s_name = name;
+	sd->s_mode = mode;
+	sd->s_flags = type;
 
 	return sd;
+
+ err_out:
+	kfree(dup_name);
+	kmem_cache_free(sysfs_dir_cachep, sd);
+	return NULL;
 }
 
-static void __sysfs_list_dirent(struct sysfs_dirent *parent_sd,
-			      struct sysfs_dirent *sd)
+/**
+ *	sysfs_attach_dentry - associate sysfs_dirent with dentry
+ *	@sd: target sysfs_dirent
+ *	@dentry: dentry to associate
+ *
+ *	Associate @sd with @dentry.  This is protected by
+ *	sysfs_assoc_lock to avoid race with sysfs_d_iput().
+ *
+ *	LOCKING:
+ *	mutex_lock(sysfs_mutex)
+ */
+static void sysfs_attach_dentry(struct sysfs_dirent *sd, struct dentry *dentry)
 {
-	if (sd)
-		list_add(&sd->s_sibling, &parent_sd->s_children);
+	dentry->d_op = &sysfs_dentry_ops;
+	dentry->d_fsdata = sysfs_get(sd);
+
+	/* protect sd->s_dentry against sysfs_d_iput */
+	spin_lock(&sysfs_assoc_lock);
+	sd->s_dentry = dentry;
+	spin_unlock(&sysfs_assoc_lock);
+
+	d_rehash(dentry);
 }
 
-static struct sysfs_dirent * sysfs_new_dirent(struct sysfs_dirent *parent_sd,
-						void * element)
+static int sysfs_ilookup_test(struct inode *inode, void *arg)
 {
-	struct sysfs_dirent *sd;
-	sd = __sysfs_new_dirent(element);
-	__sysfs_list_dirent(parent_sd, sd);
-	return sd;
+	struct sysfs_dirent *sd = arg;
+	return inode->i_ino == sd->s_ino;
 }
 
-/*
+/**
+ *	sysfs_addrm_start - prepare for sysfs_dirent add/remove
+ *	@acxt: pointer to sysfs_addrm_cxt to be used
+ *	@parent_sd: parent sysfs_dirent
  *
- * Return -EEXIST if there is already a sysfs element with the same name for
- * the same parent.
+ *	This function is called when the caller is about to add or
+ *	remove sysfs_dirent under @parent_sd.  This function acquires
+ *	sysfs_mutex, grabs inode for @parent_sd if available and lock
+ *	i_mutex of it.  @acxt is used to keep and pass context to
+ *	other addrm functions.
  *
- * called with parent inode's i_mutex held
+ *	LOCKING:
+ *	Kernel thread context (may sleep).  sysfs_mutex is locked on
+ *	return.  i_mutex of parent inode is locked on return if
+ *	available.
  */
-int sysfs_dirent_exist(struct sysfs_dirent *parent_sd,
-			  const unsigned char *new)
+void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt,
+		       struct sysfs_dirent *parent_sd)
 {
-	struct sysfs_dirent * sd;
+	struct inode *inode;
 
-	list_for_each_entry(sd, &parent_sd->s_children, s_sibling) {
-		if (sd->s_element) {
-			const unsigned char *existing = sysfs_get_name(sd);
-			if (strcmp(existing, new))
-				continue;
-			else
-				return -EEXIST;
-		}
-	}
+	memset(acxt, 0, sizeof(*acxt));
+	acxt->parent_sd = parent_sd;
 
-	return 0;
+	/* Lookup parent inode.  inode initialization and I_NEW
+	 * clearing are protected by sysfs_mutex.  By grabbing it and
+	 * looking up with _nowait variant, inode state can be
+	 * determined reliably.
+	 */
+	mutex_lock(&sysfs_mutex);
+
+	inode = ilookup5_nowait(sysfs_sb, parent_sd->s_ino, sysfs_ilookup_test,
+				parent_sd);
+
+	if (inode && !(inode->i_state & I_NEW)) {
+		/* parent inode available */
+		acxt->parent_inode = inode;
+
+		/* sysfs_mutex is below i_mutex in lock hierarchy.
+		 * First, trylock i_mutex.  If fails, unlock
+		 * sysfs_mutex and lock them in order.
+		 */
+		if (!mutex_trylock(&inode->i_mutex)) {
+			mutex_unlock(&sysfs_mutex);
+			mutex_lock(&inode->i_mutex);
+			mutex_lock(&sysfs_mutex);
+		}
+	} else
+		iput(inode);
 }
 
+/**
+ *	sysfs_add_one - add sysfs_dirent to parent
+ *	@acxt: addrm context to use
+ *	@sd: sysfs_dirent to be added
+ *
+ *	Get @acxt->parent_sd and set sd->s_parent to it and increment
+ *	nlink of parent inode if @sd is a directory.  @sd is NOT
+ *	linked into the children list of the parent.  The caller
+ *	should invoke sysfs_link_sibling() after this function
+ *	completes if @sd needs to be on the children list.
+ *
+ *	This function should be called between calls to
+ *	sysfs_addrm_start() and sysfs_addrm_finish() and should be
+ *	passed the same @acxt as passed to sysfs_addrm_start().
+ *
+ *	LOCKING:
+ *	Determined by sysfs_addrm_start().
+ */
+void sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd)
+{
+	sd->s_parent = sysfs_get(acxt->parent_sd);
+
+	if (sysfs_type(sd) == SYSFS_DIR && acxt->parent_inode)
+		inc_nlink(acxt->parent_inode);
+
+	acxt->cnt++;
+}
 
-static struct sysfs_dirent *
-__sysfs_make_dirent(struct dentry *dentry, void *element, mode_t mode, int type)
+/**
+ *	sysfs_remove_one - remove sysfs_dirent from parent
+ *	@acxt: addrm context to use
+ *	@sd: sysfs_dirent to be added
+ *
+ *	Mark @sd removed and drop nlink of parent inode if @sd is a
+ *	directory.  @sd is NOT unlinked from the children list of the
+ *	parent.  The caller is repsonsible for removing @sd from the
+ *	children list before calling this function.
+ *
+ *	This function should be called between calls to
+ *	sysfs_addrm_start() and sysfs_addrm_finish() and should be
+ *	passed the same @acxt as passed to sysfs_addrm_start().
+ *
+ *	LOCKING:
+ *	Determined by sysfs_addrm_start().
+ */
+void sysfs_remove_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd)
 {
-	struct sysfs_dirent * sd;
+	BUG_ON(sd->s_sibling || (sd->s_flags & SYSFS_FLAG_REMOVED));
 
-	sd = __sysfs_new_dirent(element);
-	if (!sd)
-		goto out;
+	sd->s_flags |= SYSFS_FLAG_REMOVED;
+	sd->s_sibling = acxt->removed;
+	acxt->removed = sd;
 
-	sd->s_mode = mode;
-	sd->s_type = type;
-	sd->s_dentry = dentry;
-	if (dentry) {
-		dentry->d_fsdata = sysfs_get(sd);
-		dentry->d_op = &sysfs_dentry_ops;
-	}
+	if (sysfs_type(sd) == SYSFS_DIR && acxt->parent_inode)
+		drop_nlink(acxt->parent_inode);
 
-out:
-	return sd;
+	acxt->cnt++;
 }
 
-int sysfs_make_dirent(struct sysfs_dirent * parent_sd, struct dentry * dentry,
-			void * element, umode_t mode, int type)
+/**
+ *	sysfs_drop_dentry - drop dentry for the specified sysfs_dirent
+ *	@sd: target sysfs_dirent
+ *
+ *	Drop dentry for @sd.  @sd must have been unlinked from its
+ *	parent on entry to this function such that it can't be looked
+ *	up anymore.
+ *
+ *	@sd->s_dentry which is protected with sysfs_assoc_lock points
+ *	to the currently associated dentry but we're not holding a
+ *	reference to it and racing with dput().  Grab dcache_lock and
+ *	verify dentry before dropping it.  If @sd->s_dentry is NULL or
+ *	dput() beats us, no need to bother.
+ */
+static void sysfs_drop_dentry(struct sysfs_dirent *sd)
 {
-	struct sysfs_dirent *sd;
+	struct dentry *dentry = NULL;
+	struct inode *inode;
+
+	/* We're not holding a reference to ->s_dentry dentry but the
+	 * field will stay valid as long as sysfs_assoc_lock is held.
+	 */
+	spin_lock(&sysfs_assoc_lock);
+	spin_lock(&dcache_lock);
+
+	/* drop dentry if it's there and dput() didn't kill it yet */
+	if (sd->s_dentry && sd->s_dentry->d_inode) {
+		dentry = dget_locked(sd->s_dentry);
+		spin_lock(&dentry->d_lock);
+		__d_drop(dentry);
+		spin_unlock(&dentry->d_lock);
+	}
 
-	sd = __sysfs_make_dirent(dentry, element, mode, type);
-	__sysfs_list_dirent(parent_sd, sd);
+	spin_unlock(&dcache_lock);
+	spin_unlock(&sysfs_assoc_lock);
 
-	return sd ? 0 : -ENOMEM;
+	/* dentries for shadowed inodes are pinned, unpin */
+	if (dentry && sysfs_is_shadowed_inode(dentry->d_inode))
+		dput(dentry);
+	dput(dentry);
+
+	/* adjust nlink and update timestamp */
+	inode = ilookup(sysfs_sb, sd->s_ino);
+	if (inode) {
+		mutex_lock(&inode->i_mutex);
+
+		inode->i_ctime = CURRENT_TIME;
+		drop_nlink(inode);
+		if (sysfs_type(sd) == SYSFS_DIR)
+			drop_nlink(inode);
+
+		mutex_unlock(&inode->i_mutex);
+		iput(inode);
+	}
 }
 
-static int init_dir(struct inode * inode)
+/**
+ *	sysfs_addrm_finish - finish up sysfs_dirent add/remove
+ *	@acxt: addrm context to finish up
+ *
+ *	Finish up sysfs_dirent add/remove.  Resources acquired by
+ *	sysfs_addrm_start() are released and removed sysfs_dirents are
+ *	cleaned up.  Timestamps on the parent inode are updated.
+ *
+ *	LOCKING:
+ *	All mutexes acquired by sysfs_addrm_start() are released.
+ *
+ *	RETURNS:
+ *	Number of added/removed sysfs_dirents since sysfs_addrm_start().
+ */
+int sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt)
 {
-	inode->i_op = &sysfs_dir_inode_operations;
-	inode->i_fop = &sysfs_dir_operations;
+	/* release resources acquired by sysfs_addrm_start() */
+	mutex_unlock(&sysfs_mutex);
+	if (acxt->parent_inode) {
+		struct inode *inode = acxt->parent_inode;
 
-	/* directory inodes start off with i_nlink == 2 (for "." entry) */
-	inc_nlink(inode);
-	return 0;
+		/* if added/removed, update timestamps on the parent */
+		if (acxt->cnt)
+			inode->i_ctime = inode->i_mtime = CURRENT_TIME;
+
+		mutex_unlock(&inode->i_mutex);
+		iput(inode);
+	}
+
+	/* kill removed sysfs_dirents */
+	while (acxt->removed) {
+		struct sysfs_dirent *sd = acxt->removed;
+
+		acxt->removed = sd->s_sibling;
+		sd->s_sibling = NULL;
+
+		sysfs_drop_dentry(sd);
+		sysfs_deactivate(sd);
+		sysfs_put(sd);
+	}
+
+	return acxt->cnt;
 }
 
-static int init_file(struct inode * inode)
+/**
+ *	sysfs_find_dirent - find sysfs_dirent with the given name
+ *	@parent_sd: sysfs_dirent to search under
+ *	@name: name to look for
+ *
+ *	Look for sysfs_dirent with name @name under @parent_sd.
+ *
+ *	LOCKING:
+ *	mutex_lock(sysfs_mutex)
+ *
+ *	RETURNS:
+ *	Pointer to sysfs_dirent if found, NULL if not.
+ */
+struct sysfs_dirent *sysfs_find_dirent(struct sysfs_dirent *parent_sd,
+				       const unsigned char *name)
 {
-	inode->i_size = PAGE_SIZE;
-	inode->i_fop = &sysfs_file_operations;
-	return 0;
+	struct sysfs_dirent *sd;
+
+	for (sd = parent_sd->s_children; sd; sd = sd->s_sibling)
+		if (sysfs_type(sd) && !strcmp(sd->s_name, name))
+			return sd;
+	return NULL;
 }
 
-static int init_symlink(struct inode * inode)
+/**
+ *	sysfs_get_dirent - find and get sysfs_dirent with the given name
+ *	@parent_sd: sysfs_dirent to search under
+ *	@name: name to look for
+ *
+ *	Look for sysfs_dirent with name @name under @parent_sd and get
+ *	it if found.
+ *
+ *	LOCKING:
+ *	Kernel thread context (may sleep).  Grabs sysfs_mutex.
+ *
+ *	RETURNS:
+ *	Pointer to sysfs_dirent if found, NULL if not.
+ */
+struct sysfs_dirent *sysfs_get_dirent(struct sysfs_dirent *parent_sd,
+				      const unsigned char *name)
 {
-	inode->i_op = &sysfs_symlink_inode_operations;
-	return 0;
+	struct sysfs_dirent *sd;
+
+	mutex_lock(&sysfs_mutex);
+	sd = sysfs_find_dirent(parent_sd, name);
+	sysfs_get(sd);
+	mutex_unlock(&sysfs_mutex);
+
+	return sd;
 }
 
-static int create_dir(struct kobject * k, struct dentry * p,
-		      const char * n, struct dentry ** d)
+static int create_dir(struct kobject *kobj, struct sysfs_dirent *parent_sd,
+		      const char *name, struct sysfs_dirent **p_sd)
 {
-	int error;
 	umode_t mode = S_IFDIR| S_IRWXU | S_IRUGO | S_IXUGO;
+	struct sysfs_addrm_cxt acxt;
+	struct sysfs_dirent *sd;
 
-	mutex_lock(&p->d_inode->i_mutex);
-	*d = lookup_one_len(n, p, strlen(n));
-	if (!IS_ERR(*d)) {
- 		if (sysfs_dirent_exist(p->d_fsdata, n))
-  			error = -EEXIST;
-  		else
-			error = sysfs_make_dirent(p->d_fsdata, *d, k, mode,
-								SYSFS_DIR);
-		if (!error) {
-			error = sysfs_create(*d, mode, init_dir);
-			if (!error) {
-				inc_nlink(p->d_inode);
-				(*d)->d_op = &sysfs_dentry_ops;
-				d_rehash(*d);
-			}
-		}
-		if (error && (error != -EEXIST)) {
-			struct sysfs_dirent *sd = (*d)->d_fsdata;
-			if (sd) {
- 				list_del_init(&sd->s_sibling);
-				sysfs_put(sd);
-			}
-			d_drop(*d);
-		}
-		dput(*d);
-	} else
-		error = PTR_ERR(*d);
-	mutex_unlock(&p->d_inode->i_mutex);
-	return error;
-}
+	/* allocate */
+	sd = sysfs_new_dirent(name, mode, SYSFS_DIR);
+	if (!sd)
+		return -ENOMEM;
+	sd->s_elem.dir.kobj = kobj;
 
+	/* link in */
+	sysfs_addrm_start(&acxt, parent_sd);
+	if (!sysfs_find_dirent(parent_sd, name)) {
+		sysfs_add_one(&acxt, sd);
+		sysfs_link_sibling(sd);
+	}
+	if (sysfs_addrm_finish(&acxt)) {
+		*p_sd = sd;
+		return 0;
+	}
 
-int sysfs_create_subdir(struct kobject * k, const char * n, struct dentry ** d)
+	sysfs_put(sd);
+	return -EEXIST;
+}
+
+int sysfs_create_subdir(struct kobject *kobj, const char *name,
+			struct sysfs_dirent **p_sd)
 {
-	return create_dir(k,k->dentry,n,d);
+	return create_dir(kobj, kobj->sd, name, p_sd);
 }
 
 /**
  *	sysfs_create_dir - create a directory for an object.
  *	@kobj:		object we're creating directory for. 
- *	@shadow_parent:	parent parent object.
+ *	@shadow_parent:	parent object.
  */
-
-int sysfs_create_dir(struct kobject * kobj, struct dentry *shadow_parent)
+int sysfs_create_dir(struct kobject *kobj,
+		     struct sysfs_dirent *shadow_parent_sd)
 {
-	struct dentry * dentry = NULL;
-	struct dentry * parent;
+	struct sysfs_dirent *parent_sd, *sd;
 	int error = 0;
 
 	BUG_ON(!kobj);
 
-	if (shadow_parent)
-		parent = shadow_parent;
+	if (shadow_parent_sd)
+		parent_sd = shadow_parent_sd;
 	else if (kobj->parent)
-		parent = kobj->parent->dentry;
+		parent_sd = kobj->parent->sd;
 	else if (sysfs_mount && sysfs_mount->mnt_sb)
-		parent = sysfs_mount->mnt_sb->s_root;
+		parent_sd = sysfs_mount->mnt_sb->s_root->d_fsdata;
 	else
 		return -EFAULT;
 
-	error = create_dir(kobj,parent,kobject_name(kobj),&dentry);
+	error = create_dir(kobj, parent_sd, kobject_name(kobj), &sd);
 	if (!error)
-		kobj->dentry = dentry;
+		kobj->sd = sd;
 	return error;
 }
 
-/* attaches attribute's sysfs_dirent to the dentry corresponding to the
- * attribute file
- */
-static int sysfs_attach_attr(struct sysfs_dirent * sd, struct dentry * dentry)
+static int sysfs_count_nlink(struct sysfs_dirent *sd)
 {
-	struct attribute * attr = NULL;
-	struct bin_attribute * bin_attr = NULL;
-	int (* init) (struct inode *) = NULL;
-	int error = 0;
-
-        if (sd->s_type & SYSFS_KOBJ_BIN_ATTR) {
-                bin_attr = sd->s_element;
-                attr = &bin_attr->attr;
-        } else {
-                attr = sd->s_element;
-                init = init_file;
-        }
+	struct sysfs_dirent *child;
+	int nr = 0;
 
-	dentry->d_fsdata = sysfs_get(sd);
-	/* protect sd->s_dentry against sysfs_d_iput */
-	spin_lock(&sysfs_lock);
-	sd->s_dentry = dentry;
-	spin_unlock(&sysfs_lock);
-	error = sysfs_create(dentry, (attr->mode & S_IALLUGO) | S_IFREG, init);
-	if (error) {
-		sysfs_put(sd);
-		return error;
-	}
-
-        if (bin_attr) {
-		dentry->d_inode->i_size = bin_attr->size;
-		dentry->d_inode->i_fop = &bin_fops;
-	}
-	dentry->d_op = &sysfs_dentry_ops;
-	d_rehash(dentry);
-
-	return 0;
-}
-
-static int sysfs_attach_link(struct sysfs_dirent * sd, struct dentry * dentry)
-{
-	int err = 0;
-
-	dentry->d_fsdata = sysfs_get(sd);
-	/* protect sd->s_dentry against sysfs_d_iput */
-	spin_lock(&sysfs_lock);
-	sd->s_dentry = dentry;
-	spin_unlock(&sysfs_lock);
-	err = sysfs_create(dentry, S_IFLNK|S_IRWXUGO, init_symlink);
-	if (!err) {
-		dentry->d_op = &sysfs_dentry_ops;
-		d_rehash(dentry);
-	} else
-		sysfs_put(sd);
-
-	return err;
+	for (child = sd->s_children; child; child = child->s_sibling)
+		if (sysfs_type(child) == SYSFS_DIR)
+			nr++;
+	return nr + 2;
 }
 
 static struct dentry * sysfs_lookup(struct inode *dir, struct dentry *dentry,
@@ -303,24 +761,60 @@ static struct dentry * sysfs_lookup(struct inode *dir, struct dentry *dentry,
 {
 	struct sysfs_dirent * parent_sd = dentry->d_parent->d_fsdata;
 	struct sysfs_dirent * sd;
-	int err = 0;
+	struct bin_attribute *bin_attr;
+	struct inode *inode;
+	int found = 0;
 
-	list_for_each_entry(sd, &parent_sd->s_children, s_sibling) {
-		if (sd->s_type & SYSFS_NOT_PINNED) {
-			const unsigned char * name = sysfs_get_name(sd);
+	for (sd = parent_sd->s_children; sd; sd = sd->s_sibling) {
+		if (sysfs_type(sd) &&
+		    !strcmp(sd->s_name, dentry->d_name.name)) {
+			found = 1;
+			break;
+		}
+	}
 
-			if (strcmp(name, dentry->d_name.name))
-				continue;
+	/* no such entry */
+	if (!found)
+		return NULL;
 
-			if (sd->s_type & SYSFS_KOBJ_LINK)
-				err = sysfs_attach_link(sd, dentry);
-			else
-				err = sysfs_attach_attr(sd, dentry);
+	/* attach dentry and inode */
+	inode = sysfs_get_inode(sd);
+	if (!inode)
+		return ERR_PTR(-ENOMEM);
+
+	mutex_lock(&sysfs_mutex);
+
+	if (inode->i_state & I_NEW) {
+		/* initialize inode according to type */
+		switch (sysfs_type(sd)) {
+		case SYSFS_DIR:
+			inode->i_op = &sysfs_dir_inode_operations;
+			inode->i_fop = &sysfs_dir_operations;
+			inode->i_nlink = sysfs_count_nlink(sd);
+			break;
+		case SYSFS_KOBJ_ATTR:
+			inode->i_size = PAGE_SIZE;
+			inode->i_fop = &sysfs_file_operations;
+			break;
+		case SYSFS_KOBJ_BIN_ATTR:
+			bin_attr = sd->s_elem.bin_attr.bin_attr;
+			inode->i_size = bin_attr->size;
+			inode->i_fop = &bin_fops;
 			break;
+		case SYSFS_KOBJ_LINK:
+			inode->i_op = &sysfs_symlink_inode_operations;
+			break;
+		default:
+			BUG();
 		}
 	}
 
-	return ERR_PTR(err);
+	sysfs_instantiate(dentry, inode);
+	sysfs_attach_dentry(sd, dentry);
+
+	mutex_unlock(&sysfs_mutex);
+
+	return NULL;
 }
 
 const struct inode_operations sysfs_dir_inode_operations = {
@@ -328,58 +822,46 @@ const struct inode_operations sysfs_dir_inode_operations = {
 	.setattr	= sysfs_setattr,
 };
 
-static void remove_dir(struct dentry * d)
+static void remove_dir(struct sysfs_dirent *sd)
 {
-	struct dentry * parent = dget(d->d_parent);
-	struct sysfs_dirent * sd;
-
-	mutex_lock(&parent->d_inode->i_mutex);
-	d_delete(d);
-	sd = d->d_fsdata;
- 	list_del_init(&sd->s_sibling);
-	sysfs_put(sd);
-	if (d->d_inode)
-		simple_rmdir(parent->d_inode,d);
-
-	pr_debug(" o %s removing done (%d)\n",d->d_name.name,
-		 atomic_read(&d->d_count));
+	struct sysfs_addrm_cxt acxt;
 
-	mutex_unlock(&parent->d_inode->i_mutex);
-	dput(parent);
+	sysfs_addrm_start(&acxt, sd->s_parent);
+	sysfs_unlink_sibling(sd);
+	sysfs_remove_one(&acxt, sd);
+	sysfs_addrm_finish(&acxt);
 }
 
-void sysfs_remove_subdir(struct dentry * d)
+void sysfs_remove_subdir(struct sysfs_dirent *sd)
 {
-	remove_dir(d);
+	remove_dir(sd);
 }
 
 
-static void __sysfs_remove_dir(struct dentry *dentry)
+static void __sysfs_remove_dir(struct sysfs_dirent *dir_sd)
 {
-	struct sysfs_dirent * parent_sd;
-	struct sysfs_dirent * sd, * tmp;
+	struct sysfs_addrm_cxt acxt;
+	struct sysfs_dirent **pos;
 
-	dget(dentry);
-	if (!dentry)
+	if (!dir_sd)
 		return;
 
-	pr_debug("sysfs %s: removing dir\n",dentry->d_name.name);
-	mutex_lock(&dentry->d_inode->i_mutex);
-	parent_sd = dentry->d_fsdata;
-	list_for_each_entry_safe(sd, tmp, &parent_sd->s_children, s_sibling) {
-		if (!sd->s_element || !(sd->s_type & SYSFS_NOT_PINNED))
-			continue;
-		list_del_init(&sd->s_sibling);
-		sysfs_drop_dentry(sd, dentry);
-		sysfs_put(sd);
+	pr_debug("sysfs %s: removing dir\n", dir_sd->s_name);
+	sysfs_addrm_start(&acxt, dir_sd);
+	pos = &dir_sd->s_children;
+	while (*pos) {
+		struct sysfs_dirent *sd = *pos;
+
+		if (sysfs_type(sd) && sysfs_type(sd) != SYSFS_DIR) {
+			*pos = sd->s_sibling;
+			sd->s_sibling = NULL;
+			sysfs_remove_one(&acxt, sd);
+		} else
+			pos = &(*pos)->s_sibling;
 	}
-	mutex_unlock(&dentry->d_inode->i_mutex);
+	sysfs_addrm_finish(&acxt);
 
-	remove_dir(dentry);
-	/**
-	 * Drop reference from dget() on entrance.
-	 */
-	dput(dentry);
+	remove_dir(dir_sd);
 }
 
 /**
@@ -393,102 +875,166 @@ static void __sysfs_remove_dir(struct dentry *dentry)
 
 void sysfs_remove_dir(struct kobject * kobj)
 {
-	__sysfs_remove_dir(kobj->dentry);
-	kobj->dentry = NULL;
+	struct sysfs_dirent *sd = kobj->sd;
+
+	spin_lock(&sysfs_assoc_lock);
+	kobj->sd = NULL;
+	spin_unlock(&sysfs_assoc_lock);
+
+	__sysfs_remove_dir(sd);
 }
 
-int sysfs_rename_dir(struct kobject * kobj, struct dentry *new_parent,
+int sysfs_rename_dir(struct kobject *kobj, struct sysfs_dirent *new_parent_sd,
 		     const char *new_name)
 {
-	int error = 0;
-	struct dentry * new_dentry;
+	struct sysfs_dirent *sd = kobj->sd;
+	struct dentry *new_parent = NULL;
+	struct dentry *old_dentry = NULL, *new_dentry = NULL;
+	const char *dup_name = NULL;
+	int error;
 
-	if (!new_parent)
-		return -EFAULT;
+	/* get dentries */
+	old_dentry = sysfs_get_dentry(sd);
+	if (IS_ERR(old_dentry)) {
+		error = PTR_ERR(old_dentry);
+		goto out_dput;
+	}
 
-	down_write(&sysfs_rename_sem);
+	new_parent = sysfs_get_dentry(new_parent_sd);
+	if (IS_ERR(new_parent)) {
+		error = PTR_ERR(new_parent);
+		goto out_dput;
+	}
+
+	/* lock new_parent and get dentry for new name */
 	mutex_lock(&new_parent->d_inode->i_mutex);
 
 	new_dentry = lookup_one_len(new_name, new_parent, strlen(new_name));
-	if (!IS_ERR(new_dentry)) {
-		/* By allowing two different directories with the
-		 * same d_parent we allow this routine to move
-		 * between different shadows of the same directory
-		 */
-		if (kobj->dentry->d_parent->d_inode != new_parent->d_inode)
-			return -EINVAL;
-		else if (new_dentry->d_parent->d_inode != new_parent->d_inode)
-			error = -EINVAL;
-		else if (new_dentry == kobj->dentry)
-			error = -EINVAL;
-		else if (!new_dentry->d_inode) {
-			error = kobject_set_name(kobj, "%s", new_name);
-			if (!error) {
-				struct sysfs_dirent *sd, *parent_sd;
-
-				d_add(new_dentry, NULL);
-				d_move(kobj->dentry, new_dentry);
-
-				sd = kobj->dentry->d_fsdata;
-				parent_sd = new_parent->d_fsdata;
-
-				list_del_init(&sd->s_sibling);
-				list_add(&sd->s_sibling, &parent_sd->s_children);
-			}
-			else
-				d_drop(new_dentry);
-		} else
-			error = -EEXIST;
-		dput(new_dentry);
+	if (IS_ERR(new_dentry)) {
+		error = PTR_ERR(new_dentry);
+		goto out_unlock;
 	}
-	mutex_unlock(&new_parent->d_inode->i_mutex);
-	up_write(&sysfs_rename_sem);
 
+	/* By allowing two different directories with the same
+	 * d_parent we allow this routine to move between different
+	 * shadows of the same directory
+	 */
+	error = -EINVAL;
+	if (old_dentry->d_parent->d_inode != new_parent->d_inode ||
+	    new_dentry->d_parent->d_inode != new_parent->d_inode ||
+	    old_dentry == new_dentry)
+		goto out_unlock;
+
+	error = -EEXIST;
+	if (new_dentry->d_inode)
+		goto out_unlock;
+
+	/* rename kobject and sysfs_dirent */
+	error = -ENOMEM;
+	new_name = dup_name = kstrdup(new_name, GFP_KERNEL);
+	if (!new_name)
+		goto out_drop;
+
+	error = kobject_set_name(kobj, "%s", new_name);
+	if (error)
+		goto out_drop;
+
+	dup_name = sd->s_name;
+	sd->s_name = new_name;
+
+	/* move under the new parent */
+	d_add(new_dentry, NULL);
+	d_move(sd->s_dentry, new_dentry);
+
+	mutex_lock(&sysfs_mutex);
+
+	sysfs_unlink_sibling(sd);
+	sysfs_get(new_parent_sd);
+	sysfs_put(sd->s_parent);
+	sd->s_parent = new_parent_sd;
+	sysfs_link_sibling(sd);
+
+	mutex_unlock(&sysfs_mutex);
+
+	error = 0;
+	goto out_unlock;
+
+ out_drop:
+	d_drop(new_dentry);
+ out_unlock:
+	mutex_unlock(&new_parent->d_inode->i_mutex);
+ out_dput:
+	kfree(dup_name);
+	dput(new_parent);
+	dput(old_dentry);
+	dput(new_dentry);
 	return error;
 }
 
-int sysfs_move_dir(struct kobject *kobj, struct kobject *new_parent)
+int sysfs_move_dir(struct kobject *kobj, struct kobject *new_parent_kobj)
 {
-	struct dentry *old_parent_dentry, *new_parent_dentry, *new_dentry;
-	struct sysfs_dirent *new_parent_sd, *sd;
+	struct sysfs_dirent *sd = kobj->sd;
+	struct sysfs_dirent *new_parent_sd;
+	struct dentry *old_parent, *new_parent = NULL;
+	struct dentry *old_dentry = NULL, *new_dentry = NULL;
 	int error;
 
-	old_parent_dentry = kobj->parent ?
-		kobj->parent->dentry : sysfs_mount->mnt_sb->s_root;
-	new_parent_dentry = new_parent ?
-		new_parent->dentry : sysfs_mount->mnt_sb->s_root;
+	BUG_ON(!sd->s_parent);
+	new_parent_sd = new_parent_kobj->sd ? new_parent_kobj->sd : &sysfs_root;
+
+	/* get dentries */
+	old_dentry = sysfs_get_dentry(sd);
+	if (IS_ERR(old_dentry)) {
+		error = PTR_ERR(old_dentry);
+		goto out_dput;
+	}
+	old_parent = sd->s_parent->s_dentry;
+
+	new_parent = sysfs_get_dentry(new_parent_sd);
+	if (IS_ERR(new_parent)) {
+		error = PTR_ERR(new_parent);
+		goto out_dput;
+	}
 
-	if (old_parent_dentry->d_inode == new_parent_dentry->d_inode)
-		return 0;	/* nothing to move */
+	if (old_parent->d_inode == new_parent->d_inode) {
+		error = 0;
+		goto out_dput;	/* nothing to move */
+	}
 again:
-	mutex_lock(&old_parent_dentry->d_inode->i_mutex);
-	if (!mutex_trylock(&new_parent_dentry->d_inode->i_mutex)) {
-		mutex_unlock(&old_parent_dentry->d_inode->i_mutex);
+	mutex_lock(&old_parent->d_inode->i_mutex);
+	if (!mutex_trylock(&new_parent->d_inode->i_mutex)) {
+		mutex_unlock(&old_parent->d_inode->i_mutex);
 		goto again;
 	}
 
-	new_parent_sd = new_parent_dentry->d_fsdata;
-	sd = kobj->dentry->d_fsdata;
-
-	new_dentry = lookup_one_len(kobj->name, new_parent_dentry,
-				    strlen(kobj->name));
+	new_dentry = lookup_one_len(kobj->name, new_parent, strlen(kobj->name));
 	if (IS_ERR(new_dentry)) {
 		error = PTR_ERR(new_dentry);
-		goto out;
+		goto out_unlock;
 	} else
 		error = 0;
 	d_add(new_dentry, NULL);
-	d_move(kobj->dentry, new_dentry);
+	d_move(sd->s_dentry, new_dentry);
 	dput(new_dentry);
 
 	/* Remove from old parent's list and insert into new parent's list. */
-	list_del_init(&sd->s_sibling);
-	list_add(&sd->s_sibling, &new_parent_sd->s_children);
+	mutex_lock(&sysfs_mutex);
+
+	sysfs_unlink_sibling(sd);
+	sysfs_get(new_parent_sd);
+	sysfs_put(sd->s_parent);
+	sd->s_parent = new_parent_sd;
+	sysfs_link_sibling(sd);
 
-out:
-	mutex_unlock(&new_parent_dentry->d_inode->i_mutex);
-	mutex_unlock(&old_parent_dentry->d_inode->i_mutex);
+	mutex_unlock(&sysfs_mutex);
 
+ out_unlock:
+	mutex_unlock(&new_parent->d_inode->i_mutex);
+	mutex_unlock(&old_parent->d_inode->i_mutex);
+ out_dput:
+	dput(new_parent);
+	dput(old_dentry);
+	dput(new_dentry);
 	return error;
 }
 
@@ -496,23 +1042,27 @@ static int sysfs_dir_open(struct inode *inode, struct file *file)
 {
 	struct dentry * dentry = file->f_path.dentry;
 	struct sysfs_dirent * parent_sd = dentry->d_fsdata;
+	struct sysfs_dirent * sd;
 
-	mutex_lock(&dentry->d_inode->i_mutex);
-	file->private_data = sysfs_new_dirent(parent_sd, NULL);
-	mutex_unlock(&dentry->d_inode->i_mutex);
-
-	return file->private_data ? 0 : -ENOMEM;
+	sd = sysfs_new_dirent("_DIR_", 0, 0);
+	if (sd) {
+		mutex_lock(&sysfs_mutex);
+		sd->s_parent = sysfs_get(parent_sd);
+		sysfs_link_sibling(sd);
+		mutex_unlock(&sysfs_mutex);
+	}
 
+	file->private_data = sd;
+	return sd ? 0 : -ENOMEM;
 }
 
 static int sysfs_dir_close(struct inode *inode, struct file *file)
 {
-	struct dentry * dentry = file->f_path.dentry;
 	struct sysfs_dirent * cursor = file->private_data;
 
-	mutex_lock(&dentry->d_inode->i_mutex);
-	list_del_init(&cursor->s_sibling);
-	mutex_unlock(&dentry->d_inode->i_mutex);
+	mutex_lock(&sysfs_mutex);
+	sysfs_unlink_sibling(cursor);
+	mutex_unlock(&sysfs_mutex);
 
 	release_sysfs_dirent(cursor);
 
@@ -530,7 +1080,7 @@ static int sysfs_readdir(struct file * filp, void * dirent, filldir_t filldir)
 	struct dentry *dentry = filp->f_path.dentry;
 	struct sysfs_dirent * parent_sd = dentry->d_fsdata;
 	struct sysfs_dirent *cursor = filp->private_data;
-	struct list_head *p, *q = &cursor->s_sibling;
+	struct sysfs_dirent **pos;
 	ino_t ino;
 	int i = filp->f_pos;
 
@@ -543,38 +1093,52 @@ static int sysfs_readdir(struct file * filp, void * dirent, filldir_t filldir)
 			i++;
 			/* fallthrough */
 		case 1:
-			ino = parent_ino(dentry);
+			if (parent_sd->s_parent)
+				ino = parent_sd->s_parent->s_ino;
+			else
+				ino = parent_sd->s_ino;
 			if (filldir(dirent, "..", 2, i, ino, DT_DIR) < 0)
 				break;
 			filp->f_pos++;
 			i++;
 			/* fallthrough */
 		default:
+			mutex_lock(&sysfs_mutex);
+
+			pos = &parent_sd->s_children;
+			while (*pos != cursor)
+				pos = &(*pos)->s_sibling;
+
+			/* unlink cursor */
+			*pos = cursor->s_sibling;
+
 			if (filp->f_pos == 2)
-				list_move(q, &parent_sd->s_children);
+				pos = &parent_sd->s_children;
 
-			for (p=q->next; p!= &parent_sd->s_children; p=p->next) {
-				struct sysfs_dirent *next;
+			for ( ; *pos; pos = &(*pos)->s_sibling) {
+				struct sysfs_dirent *next = *pos;
 				const char * name;
 				int len;
 
-				next = list_entry(p, struct sysfs_dirent,
-						   s_sibling);
-				if (!next->s_element)
+				if (!sysfs_type(next))
 					continue;
 
-				name = sysfs_get_name(next);
+				name = next->s_name;
 				len = strlen(name);
 				ino = next->s_ino;
 
 				if (filldir(dirent, name, len, filp->f_pos, ino,
 						 dt_type(next)) < 0)
-					return 0;
+					break;
 
-				list_move(q, p);
-				p = q;
 				filp->f_pos++;
 			}
+
+			/* put cursor back in */
+			cursor->s_sibling = *pos;
+			*pos = cursor;
+
+			mutex_unlock(&sysfs_mutex);
 	}
 	return 0;
 }
@@ -583,7 +1147,6 @@ static loff_t sysfs_dir_lseek(struct file * file, loff_t offset, int origin)
 {
 	struct dentry * dentry = file->f_path.dentry;
 
-	mutex_lock(&dentry->d_inode->i_mutex);
 	switch (origin) {
 		case 1:
 			offset += file->f_pos;
@@ -591,31 +1154,35 @@ static loff_t sysfs_dir_lseek(struct file * file, loff_t offset, int origin)
 			if (offset >= 0)
 				break;
 		default:
-			mutex_unlock(&file->f_path.dentry->d_inode->i_mutex);
 			return -EINVAL;
 	}
 	if (offset != file->f_pos) {
+		mutex_lock(&sysfs_mutex);
+
 		file->f_pos = offset;
 		if (file->f_pos >= 2) {
 			struct sysfs_dirent *sd = dentry->d_fsdata;
 			struct sysfs_dirent *cursor = file->private_data;
-			struct list_head *p;
+			struct sysfs_dirent **pos;
 			loff_t n = file->f_pos - 2;
 
-			list_del(&cursor->s_sibling);
-			p = sd->s_children.next;
-			while (n && p != &sd->s_children) {
-				struct sysfs_dirent *next;
-				next = list_entry(p, struct sysfs_dirent,
-						   s_sibling);
-				if (next->s_element)
+			sysfs_unlink_sibling(cursor);
+
+			pos = &sd->s_children;
+			while (n && *pos) {
+				struct sysfs_dirent *next = *pos;
+				if (sysfs_type(next))
 					n--;
-				p = p->next;
+				pos = &(*pos)->s_sibling;
 			}
-			list_add_tail(&cursor->s_sibling, p);
+
+			cursor->s_sibling = *pos;
+			*pos = cursor;
 		}
+
+		mutex_unlock(&sysfs_mutex);
 	}
-	mutex_unlock(&dentry->d_inode->i_mutex);
+
 	return offset;
 }
 
@@ -628,12 +1195,20 @@ static loff_t sysfs_dir_lseek(struct file * file, loff_t offset, int origin)
 int sysfs_make_shadowed_dir(struct kobject *kobj,
 	void * (*follow_link)(struct dentry *, struct nameidata *))
 {
+	struct dentry *dentry;
 	struct inode *inode;
 	struct inode_operations *i_op;
 
-	inode = kobj->dentry->d_inode;
-	if (inode->i_op != &sysfs_dir_inode_operations)
+	/* get dentry for @kobj->sd, dentry of a shadowed dir is pinned */
+	dentry = sysfs_get_dentry(kobj->sd);
+	if (IS_ERR(dentry))
+		return PTR_ERR(dentry);
+
+	inode = dentry->d_inode;
+	if (inode->i_op != &sysfs_dir_inode_operations) {
+		dput(dentry);
 		return -EINVAL;
+	}
 
 	i_op = kmalloc(sizeof(*i_op), GFP_KERNEL);
 	if (!i_op)
@@ -658,54 +1233,72 @@ int sysfs_make_shadowed_dir(struct kobject *kobj,
  *	directory.
  */
 
-struct dentry *sysfs_create_shadow_dir(struct kobject *kobj)
+struct sysfs_dirent *sysfs_create_shadow_dir(struct kobject *kobj)
 {
-	struct sysfs_dirent *sd;
-	struct dentry *parent, *dir, *shadow;
+	struct sysfs_dirent *parent_sd = kobj->sd->s_parent;
+	struct dentry *dir, *parent, *shadow;
 	struct inode *inode;
+	struct sysfs_dirent *sd;
+	struct sysfs_addrm_cxt acxt;
 
-	dir = kobj->dentry;
-	inode = dir->d_inode;
+	dir = sysfs_get_dentry(kobj->sd);
+	if (IS_ERR(dir)) {
+		sd = (void *)dir;
+		goto out;
+	}
 	parent = dir->d_parent;
-	shadow = ERR_PTR(-EINVAL);
+
+	inode = dir->d_inode;
+	sd = ERR_PTR(-EINVAL);
 	if (!sysfs_is_shadowed_inode(inode))
-		goto out;
+		goto out_dput;
 
 	shadow = d_alloc(parent, &dir->d_name);
 	if (!shadow)
 		goto nomem;
 
-	sd = __sysfs_make_dirent(shadow, kobj, inode->i_mode, SYSFS_DIR);
+	sd = sysfs_new_dirent("_SHADOW_", inode->i_mode, SYSFS_DIR);
 	if (!sd)
 		goto nomem;
+	sd->s_elem.dir.kobj = kobj;
 
+	sysfs_addrm_start(&acxt, parent_sd);
+
+	/* add but don't link into children list */
+	sysfs_add_one(&acxt, sd);
+
+	/* attach and instantiate dentry */
+	sysfs_attach_dentry(sd, shadow);
 	d_instantiate(shadow, igrab(inode));
-	inc_nlink(inode);
-	inc_nlink(parent->d_inode);
-	shadow->d_op = &sysfs_dentry_ops;
+	inc_nlink(inode);	/* tj: synchronization? */
+
+	sysfs_addrm_finish(&acxt);
 
 	dget(shadow);		/* Extra count - pin the dentry in core */
 
-out:
-	return shadow;
-nomem:
+	goto out_dput;
+
+ nomem:
 	dput(shadow);
-	shadow = ERR_PTR(-ENOMEM);
-	goto out;
+	sd = ERR_PTR(-ENOMEM);
+ out_dput:
+	dput(dir);
+ out:
+	return sd;
 }
 
 /**
  *	sysfs_remove_shadow_dir - remove an object's directory.
- *	@shadow: dentry of shadow directory
+ *	@shadow_sd: sysfs_dirent of shadow directory
  *
  *	The only thing special about this is that we remove any files in
  *	the directory before we remove the directory, and we've inlined
  *	what used to be sysfs_rmdir() below, instead of calling separately.
  */
 
-void sysfs_remove_shadow_dir(struct dentry *shadow)
+void sysfs_remove_shadow_dir(struct sysfs_dirent *shadow_sd)
 {
-	__sysfs_remove_dir(shadow);
+	__sysfs_remove_dir(shadow_sd);
 }
 
 const struct file_operations sysfs_dir_operations = {
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index b502c7197ec0..cc497994b2a8 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -50,29 +50,15 @@ static struct sysfs_ops subsys_sysfs_ops = {
 	.store	= subsys_attr_store,
 };
 
-/**
- *	add_to_collection - add buffer to a collection
- *	@buffer:	buffer to be added
- *	@node:		inode of set to add to
- */
-
-static inline void
-add_to_collection(struct sysfs_buffer *buffer, struct inode *node)
-{
-	struct sysfs_buffer_collection *set = node->i_private;
-
-	mutex_lock(&node->i_mutex);
-	list_add(&buffer->associates, &set->associates);
-	mutex_unlock(&node->i_mutex);
-}
-
-static inline void
-remove_from_collection(struct sysfs_buffer *buffer, struct inode *node)
-{
-	mutex_lock(&node->i_mutex);
-	list_del(&buffer->associates);
-	mutex_unlock(&node->i_mutex);
-}
+struct sysfs_buffer {
+	size_t			count;
+	loff_t			pos;
+	char			* page;
+	struct sysfs_ops	* ops;
+	struct semaphore	sem;
+	int			needs_read_fill;
+	int			event;
+};
 
 /**
  *	fill_read_buffer - allocate and fill buffer from object.
@@ -87,9 +73,8 @@ remove_from_collection(struct sysfs_buffer *buffer, struct inode *node)
  */
 static int fill_read_buffer(struct dentry * dentry, struct sysfs_buffer * buffer)
 {
-	struct sysfs_dirent * sd = dentry->d_fsdata;
-	struct attribute * attr = to_attr(dentry);
-	struct kobject * kobj = to_kobj(dentry->d_parent);
+	struct sysfs_dirent *attr_sd = dentry->d_fsdata;
+	struct kobject *kobj = attr_sd->s_parent->s_elem.dir.kobj;
 	struct sysfs_ops * ops = buffer->ops;
 	int ret = 0;
 	ssize_t count;
@@ -99,8 +84,15 @@ static int fill_read_buffer(struct dentry * dentry, struct sysfs_buffer * buffer
 	if (!buffer->page)
 		return -ENOMEM;
 
-	buffer->event = atomic_read(&sd->s_event);
-	count = ops->show(kobj,attr,buffer->page);
+	/* need attr_sd for attr and ops, its parent for kobj */
+	if (!sysfs_get_active_two(attr_sd))
+		return -ENODEV;
+
+	buffer->event = atomic_read(&attr_sd->s_event);
+	count = ops->show(kobj, attr_sd->s_elem.attr.attr, buffer->page);
+
+	sysfs_put_active_two(attr_sd);
+
 	BUG_ON(count > (ssize_t)PAGE_SIZE);
 	if (count >= 0) {
 		buffer->needs_read_fill = 0;
@@ -138,10 +130,7 @@ sysfs_read_file(struct file *file, char __user *buf, size_t count, loff_t *ppos)
 
 	down(&buffer->sem);
 	if (buffer->needs_read_fill) {
-		if (buffer->orphaned)
-			retval = -ENODEV;
-		else
-			retval = fill_read_buffer(file->f_path.dentry,buffer);
+		retval = fill_read_buffer(file->f_path.dentry,buffer);
 		if (retval)
 			goto out;
 	}
@@ -196,14 +185,23 @@ fill_write_buffer(struct sysfs_buffer * buffer, const char __user * buf, size_t
  *	passing the buffer that we acquired in fill_write_buffer().
  */
 
-static int 
+static int
 flush_write_buffer(struct dentry * dentry, struct sysfs_buffer * buffer, size_t count)
 {
-	struct attribute * attr = to_attr(dentry);
-	struct kobject * kobj = to_kobj(dentry->d_parent);
+	struct sysfs_dirent *attr_sd = dentry->d_fsdata;
+	struct kobject *kobj = attr_sd->s_parent->s_elem.dir.kobj;
 	struct sysfs_ops * ops = buffer->ops;
+	int rc;
+
+	/* need attr_sd for attr and ops, its parent for kobj */
+	if (!sysfs_get_active_two(attr_sd))
+		return -ENODEV;
+
+	rc = ops->store(kobj, attr_sd->s_elem.attr.attr, buffer->page, count);
 
-	return ops->store(kobj,attr,buffer->page,count);
+	sysfs_put_active_two(attr_sd);
+
+	return rc;
 }
 
 
@@ -231,37 +229,26 @@ sysfs_write_file(struct file *file, const char __user *buf, size_t count, loff_t
 	ssize_t len;
 
 	down(&buffer->sem);
-	if (buffer->orphaned) {
-		len = -ENODEV;
-		goto out;
-	}
 	len = fill_write_buffer(buffer, buf, count);
 	if (len > 0)
 		len = flush_write_buffer(file->f_path.dentry, buffer, len);
 	if (len > 0)
 		*ppos += len;
-out:
 	up(&buffer->sem);
 	return len;
 }
 
 static int sysfs_open_file(struct inode *inode, struct file *file)
 {
-	struct kobject *kobj = sysfs_get_kobject(file->f_path.dentry->d_parent);
-	struct attribute * attr = to_attr(file->f_path.dentry);
-	struct sysfs_buffer_collection *set;
+	struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
+	struct kobject *kobj = attr_sd->s_parent->s_elem.dir.kobj;
 	struct sysfs_buffer * buffer;
 	struct sysfs_ops * ops = NULL;
-	int error = 0;
-
-	if (!kobj || !attr)
-		goto Einval;
+	int error;
 
-	/* Grab the module reference for this attribute if we have one */
-	if (!try_module_get(attr->owner)) {
-		error = -ENODEV;
-		goto Done;
-	}
+	/* need attr_sd for attr and ops, its parent for kobj */
+	if (!sysfs_get_active_two(attr_sd))
+		return -ENODEV;
 
 	/* if the kobject has no ktype, then we assume that it is a subsystem
 	 * itself, and use ops for it.
@@ -273,33 +260,21 @@ static int sysfs_open_file(struct inode *inode, struct file *file)
 	else
 		ops = &subsys_sysfs_ops;
 
+	error = -EACCES;
+
 	/* No sysfs operations, either from having no subsystem,
 	 * or the subsystem have no operations.
 	 */
 	if (!ops)
-		goto Eaccess;
-
-	/* make sure we have a collection to add our buffers to */
-	mutex_lock(&inode->i_mutex);
-	if (!(set = inode->i_private)) {
-		if (!(set = inode->i_private = kmalloc(sizeof(struct sysfs_buffer_collection), GFP_KERNEL))) {
-			error = -ENOMEM;
-			goto Done;
-		} else {
-			INIT_LIST_HEAD(&set->associates);
-		}
-	}
-	mutex_unlock(&inode->i_mutex);
+		goto err_out;
 
 	/* File needs write support.
 	 * The inode's perms must say it's ok, 
 	 * and we must have a store method.
 	 */
 	if (file->f_mode & FMODE_WRITE) {
-
 		if (!(inode->i_mode & S_IWUGO) || !ops->store)
-			goto Eaccess;
-
+			goto err_out;
 	}
 
 	/* File needs read support.
@@ -308,48 +283,38 @@ static int sysfs_open_file(struct inode *inode, struct file *file)
 	 */
 	if (file->f_mode & FMODE_READ) {
 		if (!(inode->i_mode & S_IRUGO) || !ops->show)
-			goto Eaccess;
+			goto err_out;
 	}
 
 	/* No error? Great, allocate a buffer for the file, and store it
 	 * it in file->private_data for easy access.
 	 */
+	error = -ENOMEM;
 	buffer = kzalloc(sizeof(struct sysfs_buffer), GFP_KERNEL);
-	if (buffer) {
-		INIT_LIST_HEAD(&buffer->associates);
-		init_MUTEX(&buffer->sem);
-		buffer->needs_read_fill = 1;
-		buffer->ops = ops;
-		add_to_collection(buffer, inode);
-		file->private_data = buffer;
-	} else
-		error = -ENOMEM;
-	goto Done;
-
- Einval:
-	error = -EINVAL;
-	goto Done;
- Eaccess:
-	error = -EACCES;
-	module_put(attr->owner);
- Done:
-	if (error)
-		kobject_put(kobj);
+	if (!buffer)
+		goto err_out;
+
+	init_MUTEX(&buffer->sem);
+	buffer->needs_read_fill = 1;
+	buffer->ops = ops;
+	file->private_data = buffer;
+
+	/* open succeeded, put active references and pin attr_sd */
+	sysfs_put_active_two(attr_sd);
+	sysfs_get(attr_sd);
+	return 0;
+
+ err_out:
+	sysfs_put_active_two(attr_sd);
 	return error;
 }
 
 static int sysfs_release(struct inode * inode, struct file * filp)
 {
-	struct kobject * kobj = to_kobj(filp->f_path.dentry->d_parent);
-	struct attribute * attr = to_attr(filp->f_path.dentry);
-	struct module * owner = attr->owner;
-	struct sysfs_buffer * buffer = filp->private_data;
+	struct sysfs_dirent *attr_sd = filp->f_path.dentry->d_fsdata;
+	struct sysfs_buffer *buffer = filp->private_data;
 
-	if (buffer)
-		remove_from_collection(buffer, inode);
-	kobject_put(kobj);
-	/* After this point, attr should not be accessed. */
-	module_put(owner);
+	sysfs_put(attr_sd);
 
 	if (buffer) {
 		if (buffer->page)
@@ -376,57 +341,43 @@ static int sysfs_release(struct inode * inode, struct file * filp)
 static unsigned int sysfs_poll(struct file *filp, poll_table *wait)
 {
 	struct sysfs_buffer * buffer = filp->private_data;
-	struct kobject * kobj = to_kobj(filp->f_path.dentry->d_parent);
-	struct sysfs_dirent * sd = filp->f_path.dentry->d_fsdata;
-	int res = 0;
+	struct sysfs_dirent *attr_sd = filp->f_path.dentry->d_fsdata;
+	struct kobject *kobj = attr_sd->s_parent->s_elem.dir.kobj;
+
+	/* need parent for the kobj, grab both */
+	if (!sysfs_get_active_two(attr_sd))
+		goto trigger;
 
 	poll_wait(filp, &kobj->poll, wait);
 
-	if (buffer->event != atomic_read(&sd->s_event)) {
-		res = POLLERR|POLLPRI;
-		buffer->needs_read_fill = 1;
-	}
+	sysfs_put_active_two(attr_sd);
 
-	return res;
-}
+	if (buffer->event != atomic_read(&attr_sd->s_event))
+		goto trigger;
 
+	return 0;
 
-static struct dentry *step_down(struct dentry *dir, const char * name)
-{
-	struct dentry * de;
-
-	if (dir == NULL || dir->d_inode == NULL)
-		return NULL;
-
-	mutex_lock(&dir->d_inode->i_mutex);
-	de = lookup_one_len(name, dir, strlen(name));
-	mutex_unlock(&dir->d_inode->i_mutex);
-	dput(dir);
-	if (IS_ERR(de))
-		return NULL;
-	if (de->d_inode == NULL) {
-		dput(de);
-		return NULL;
-	}
-	return de;
+ trigger:
+	buffer->needs_read_fill = 1;
+	return POLLERR|POLLPRI;
 }
 
-void sysfs_notify(struct kobject * k, char *dir, char *attr)
+void sysfs_notify(struct kobject *k, char *dir, char *attr)
 {
-	struct dentry *de = k->dentry;
-	if (de)
-		dget(de);
-	if (de && dir)
-		de = step_down(de, dir);
-	if (de && attr)
-		de = step_down(de, attr);
-	if (de) {
-		struct sysfs_dirent * sd = de->d_fsdata;
-		if (sd)
-			atomic_inc(&sd->s_event);
+	struct sysfs_dirent *sd = k->sd;
+
+	mutex_lock(&sysfs_mutex);
+
+	if (sd && dir)
+		sd = sysfs_find_dirent(sd, dir);
+	if (sd && attr)
+		sd = sysfs_find_dirent(sd, attr);
+	if (sd) {
+		atomic_inc(&sd->s_event);
 		wake_up_interruptible(&k->poll);
-		dput(de);
 	}
+
+	mutex_unlock(&sysfs_mutex);
 }
 EXPORT_SYMBOL_GPL(sysfs_notify);
 
@@ -440,19 +391,30 @@ const struct file_operations sysfs_file_operations = {
 };
 
 
-int sysfs_add_file(struct dentry * dir, const struct attribute * attr, int type)
+int sysfs_add_file(struct sysfs_dirent *dir_sd, const struct attribute *attr,
+		   int type)
 {
-	struct sysfs_dirent * parent_sd = dir->d_fsdata;
 	umode_t mode = (attr->mode & S_IALLUGO) | S_IFREG;
-	int error = -EEXIST;
+	struct sysfs_addrm_cxt acxt;
+	struct sysfs_dirent *sd;
 
-	mutex_lock(&dir->d_inode->i_mutex);
-	if (!sysfs_dirent_exist(parent_sd, attr->name))
-		error = sysfs_make_dirent(parent_sd, NULL, (void *)attr,
-					  mode, type);
-	mutex_unlock(&dir->d_inode->i_mutex);
+	sd = sysfs_new_dirent(attr->name, mode, type);
+	if (!sd)
+		return -ENOMEM;
+	sd->s_elem.attr.attr = (void *)attr;
 
-	return error;
+	sysfs_addrm_start(&acxt, dir_sd);
+
+	if (!sysfs_find_dirent(dir_sd, attr->name)) {
+		sysfs_add_one(&acxt, sd);
+		sysfs_link_sibling(sd);
+	}
+
+	if (sysfs_addrm_finish(&acxt))
+		return 0;
+
+	sysfs_put(sd);
+	return -EEXIST;
 }
 
 
@@ -464,9 +426,9 @@ int sysfs_add_file(struct dentry * dir, const struct attribute * attr, int type)
 
 int sysfs_create_file(struct kobject * kobj, const struct attribute * attr)
 {
-	BUG_ON(!kobj || !kobj->dentry || !attr);
+	BUG_ON(!kobj || !kobj->sd || !attr);
 
-	return sysfs_add_file(kobj->dentry, attr, SYSFS_KOBJ_ATTR);
+	return sysfs_add_file(kobj->sd, attr, SYSFS_KOBJ_ATTR);
 
 }
 
@@ -480,16 +442,16 @@ int sysfs_create_file(struct kobject * kobj, const struct attribute * attr)
 int sysfs_add_file_to_group(struct kobject *kobj,
 		const struct attribute *attr, const char *group)
 {
-	struct dentry *dir;
+	struct sysfs_dirent *dir_sd;
 	int error;
 
-	dir = lookup_one_len(group, kobj->dentry, strlen(group));
-	if (IS_ERR(dir))
-		error = PTR_ERR(dir);
-	else {
-		error = sysfs_add_file(dir, attr, SYSFS_KOBJ_ATTR);
-		dput(dir);
-	}
+	dir_sd = sysfs_get_dirent(kobj->sd, group);
+	if (!dir_sd)
+		return -ENOENT;
+
+	error = sysfs_add_file(dir_sd, attr, SYSFS_KOBJ_ATTR);
+	sysfs_put(dir_sd);
+
 	return error;
 }
 EXPORT_SYMBOL_GPL(sysfs_add_file_to_group);
@@ -502,30 +464,31 @@ EXPORT_SYMBOL_GPL(sysfs_add_file_to_group);
  */
 int sysfs_update_file(struct kobject * kobj, const struct attribute * attr)
 {
-	struct dentry * dir = kobj->dentry;
-	struct dentry * victim;
-	int res = -ENOENT;
-
-	mutex_lock(&dir->d_inode->i_mutex);
-	victim = lookup_one_len(attr->name, dir, strlen(attr->name));
-	if (!IS_ERR(victim)) {
-		/* make sure dentry is really there */
-		if (victim->d_inode && 
-		    (victim->d_parent->d_inode == dir->d_inode)) {
-			victim->d_inode->i_mtime = CURRENT_TIME;
-			fsnotify_modify(victim);
-			res = 0;
-		} else
-			d_drop(victim);
-		
-		/**
-		 * Drop the reference acquired from lookup_one_len() above.
-		 */
-		dput(victim);
+	struct sysfs_dirent *victim_sd = NULL;
+	struct dentry *victim = NULL;
+	int rc;
+
+	rc = -ENOENT;
+	victim_sd = sysfs_get_dirent(kobj->sd, attr->name);
+	if (!victim_sd)
+		goto out;
+
+	victim = sysfs_get_dentry(victim_sd);
+	if (IS_ERR(victim)) {
+		rc = PTR_ERR(victim);
+		victim = NULL;
+		goto out;
 	}
-	mutex_unlock(&dir->d_inode->i_mutex);
 
-	return res;
+	mutex_lock(&victim->d_inode->i_mutex);
+	victim->d_inode->i_mtime = CURRENT_TIME;
+	fsnotify_modify(victim);
+	mutex_unlock(&victim->d_inode->i_mutex);
+	rc = 0;
+ out:
+	dput(victim);
+	sysfs_put(victim_sd);
+	return rc;
 }
 
 
@@ -538,30 +501,34 @@ int sysfs_update_file(struct kobject * kobj, const struct attribute * attr)
  */
 int sysfs_chmod_file(struct kobject *kobj, struct attribute *attr, mode_t mode)
 {
-	struct dentry *dir = kobj->dentry;
-	struct dentry *victim;
+	struct sysfs_dirent *victim_sd = NULL;
+	struct dentry *victim = NULL;
 	struct inode * inode;
 	struct iattr newattrs;
-	int res = -ENOENT;
-
-	mutex_lock(&dir->d_inode->i_mutex);
-	victim = lookup_one_len(attr->name, dir, strlen(attr->name));
-	if (!IS_ERR(victim)) {
-		if (victim->d_inode &&
-		    (victim->d_parent->d_inode == dir->d_inode)) {
-			inode = victim->d_inode;
-			mutex_lock(&inode->i_mutex);
-			newattrs.ia_mode = (mode & S_IALLUGO) |
-						(inode->i_mode & ~S_IALLUGO);
-			newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
-			res = notify_change(victim, &newattrs);
-			mutex_unlock(&inode->i_mutex);
-		}
-		dput(victim);
+	int rc;
+
+	rc = -ENOENT;
+	victim_sd = sysfs_get_dirent(kobj->sd, attr->name);
+	if (!victim_sd)
+		goto out;
+
+	victim = sysfs_get_dentry(victim_sd);
+	if (IS_ERR(victim)) {
+		rc = PTR_ERR(victim);
+		victim = NULL;
+		goto out;
 	}
-	mutex_unlock(&dir->d_inode->i_mutex);
 
-	return res;
+	inode = victim->d_inode;
+	mutex_lock(&inode->i_mutex);
+	newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO);
+	newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
+	rc = notify_change(victim, &newattrs);
+	mutex_unlock(&inode->i_mutex);
+ out:
+	dput(victim);
+	sysfs_put(victim_sd);
+	return rc;
 }
 EXPORT_SYMBOL_GPL(sysfs_chmod_file);
 
@@ -576,7 +543,7 @@ EXPORT_SYMBOL_GPL(sysfs_chmod_file);
 
 void sysfs_remove_file(struct kobject * kobj, const struct attribute * attr)
 {
-	sysfs_hash_and_remove(kobj->dentry, attr->name);
+	sysfs_hash_and_remove(kobj->sd, attr->name);
 }
 
 
@@ -589,12 +556,12 @@ void sysfs_remove_file(struct kobject * kobj, const struct attribute * attr)
 void sysfs_remove_file_from_group(struct kobject *kobj,
 		const struct attribute *attr, const char *group)
 {
-	struct dentry *dir;
+	struct sysfs_dirent *dir_sd;
 
-	dir = lookup_one_len(group, kobj->dentry, strlen(group));
-	if (!IS_ERR(dir)) {
-		sysfs_hash_and_remove(dir, attr->name);
-		dput(dir);
+	dir_sd = sysfs_get_dirent(kobj->sd, group);
+	if (dir_sd) {
+		sysfs_hash_and_remove(dir_sd, attr->name);
+		sysfs_put(dir_sd);
 	}
 }
 EXPORT_SYMBOL_GPL(sysfs_remove_file_from_group);
diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c
index 52eed2a7a5ef..f318b73c790c 100644
--- a/fs/sysfs/group.c
+++ b/fs/sysfs/group.c
@@ -18,26 +18,25 @@
 #include "sysfs.h"
 
 
-static void remove_files(struct dentry * dir, 
-			 const struct attribute_group * grp)
+static void remove_files(struct sysfs_dirent *dir_sd,
+			 const struct attribute_group *grp)
 {
 	struct attribute *const* attr;
 
 	for (attr = grp->attrs; *attr; attr++)
-		sysfs_hash_and_remove(dir,(*attr)->name);
+		sysfs_hash_and_remove(dir_sd, (*attr)->name);
 }
 
-static int create_files(struct dentry * dir,
-			const struct attribute_group * grp)
+static int create_files(struct sysfs_dirent *dir_sd,
+			const struct attribute_group *grp)
 {
 	struct attribute *const* attr;
 	int error = 0;
 
-	for (attr = grp->attrs; *attr && !error; attr++) {
-		error = sysfs_add_file(dir, *attr, SYSFS_KOBJ_ATTR);
-	}
+	for (attr = grp->attrs; *attr && !error; attr++)
+		error = sysfs_add_file(dir_sd, *attr, SYSFS_KOBJ_ATTR);
 	if (error)
-		remove_files(dir,grp);
+		remove_files(dir_sd, grp);
 	return error;
 }
 
@@ -45,44 +44,44 @@ static int create_files(struct dentry * dir,
 int sysfs_create_group(struct kobject * kobj, 
 		       const struct attribute_group * grp)
 {
-	struct dentry * dir;
+	struct sysfs_dirent *sd;
 	int error;
 
-	BUG_ON(!kobj || !kobj->dentry);
+	BUG_ON(!kobj || !kobj->sd);
 
 	if (grp->name) {
-		error = sysfs_create_subdir(kobj,grp->name,&dir);
+		error = sysfs_create_subdir(kobj, grp->name, &sd);
 		if (error)
 			return error;
 	} else
-		dir = kobj->dentry;
-	dir = dget(dir);
-	if ((error = create_files(dir,grp))) {
+		sd = kobj->sd;
+	sysfs_get(sd);
+	error = create_files(sd, grp);
+	if (error) {
 		if (grp->name)
-			sysfs_remove_subdir(dir);
+			sysfs_remove_subdir(sd);
 	}
-	dput(dir);
+	sysfs_put(sd);
 	return error;
 }
 
 void sysfs_remove_group(struct kobject * kobj, 
 			const struct attribute_group * grp)
 {
-	struct dentry * dir;
+	struct sysfs_dirent *dir_sd = kobj->sd;
+	struct sysfs_dirent *sd;
 
 	if (grp->name) {
-		dir = lookup_one_len_kern(grp->name, kobj->dentry,
-				strlen(grp->name));
-		BUG_ON(IS_ERR(dir));
-	}
-	else
-		dir = dget(kobj->dentry);
+		sd = sysfs_get_dirent(dir_sd, grp->name);
+		BUG_ON(!sd);
+	} else
+		sd = sysfs_get(dir_sd);
 
-	remove_files(dir,grp);
+	remove_files(sd, grp);
 	if (grp->name)
-		sysfs_remove_subdir(dir);
-	/* release the ref. taken in this routine */
-	dput(dir);
+		sysfs_remove_subdir(sd);
+
+	sysfs_put(sd);
 }
 
 
diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c
index 5266eec15f6e..3756e152285a 100644
--- a/fs/sysfs/inode.c
+++ b/fs/sysfs/inode.c
@@ -133,187 +133,94 @@ static inline void set_inode_attr(struct inode * inode, struct iattr * iattr)
  */
 static struct lock_class_key sysfs_inode_imutex_key;
 
-struct inode * sysfs_new_inode(mode_t mode, struct sysfs_dirent * sd)
+void sysfs_init_inode(struct sysfs_dirent *sd, struct inode *inode)
 {
-	struct inode * inode = new_inode(sysfs_sb);
-	if (inode) {
-		inode->i_blocks = 0;
-		inode->i_mapping->a_ops = &sysfs_aops;
-		inode->i_mapping->backing_dev_info = &sysfs_backing_dev_info;
-		inode->i_op = &sysfs_inode_operations;
-		inode->i_ino = sd->s_ino;
-		lockdep_set_class(&inode->i_mutex, &sysfs_inode_imutex_key);
-
-		if (sd->s_iattr) {
-			/* sysfs_dirent has non-default attributes
-			 * get them for the new inode from persistent copy
-			 * in sysfs_dirent
-			 */
-			set_inode_attr(inode, sd->s_iattr);
-		} else
-			set_default_inode_attr(inode, mode);
-	}
-	return inode;
-}
-
-int sysfs_create(struct dentry * dentry, int mode, int (*init)(struct inode *))
-{
-	int error = 0;
-	struct inode * inode = NULL;
-	if (dentry) {
-		if (!dentry->d_inode) {
-			struct sysfs_dirent * sd = dentry->d_fsdata;
-			if ((inode = sysfs_new_inode(mode, sd))) {
-				if (dentry->d_parent && dentry->d_parent->d_inode) {
-					struct inode *p_inode = dentry->d_parent->d_inode;
-					p_inode->i_mtime = p_inode->i_ctime = CURRENT_TIME;
-				}
-				goto Proceed;
-			}
-			else 
-				error = -ENOMEM;
-		} else
-			error = -EEXIST;
-	} else 
-		error = -ENOENT;
-	goto Done;
-
- Proceed:
-	if (init)
-		error = init(inode);
-	if (!error) {
-		d_instantiate(dentry, inode);
-		if (S_ISDIR(mode))
-			dget(dentry);  /* pin only directory dentry in core */
+	inode->i_blocks = 0;
+	inode->i_mapping->a_ops = &sysfs_aops;
+	inode->i_mapping->backing_dev_info = &sysfs_backing_dev_info;
+	inode->i_op = &sysfs_inode_operations;
+	inode->i_ino = sd->s_ino;
+	lockdep_set_class(&inode->i_mutex, &sysfs_inode_imutex_key);
+
+	if (sd->s_iattr) {
+		/* sysfs_dirent has non-default attributes
+		 * get them for the new inode from persistent copy
+		 * in sysfs_dirent
+		 */
+		set_inode_attr(inode, sd->s_iattr);
 	} else
-		iput(inode);
- Done:
-	return error;
+		set_default_inode_attr(inode, sd->s_mode);
 }
 
-/*
- * Get the name for corresponding element represented by the given sysfs_dirent
+/**
+ *	sysfs_get_inode - get inode for sysfs_dirent
+ *	@sd: sysfs_dirent to allocate inode for
+ *
+ *	Get inode for @sd.  If such inode doesn't exist, a new inode
+ *	is allocated and basics are initialized.  New inode is
+ *	returned locked.
+ *
+ *	LOCKING:
+ *	Kernel thread context (may sleep).
+ *
+ *	RETURNS:
+ *	Pointer to allocated inode on success, NULL on failure.
  */
-const unsigned char * sysfs_get_name(struct sysfs_dirent *sd)
+struct inode * sysfs_get_inode(struct sysfs_dirent *sd)
 {
-	struct attribute * attr;
-	struct bin_attribute * bin_attr;
-	struct sysfs_symlink  * sl;
-
-	BUG_ON(!sd || !sd->s_element);
-
-	switch (sd->s_type) {
-		case SYSFS_DIR:
-			/* Always have a dentry so use that */
-			return sd->s_dentry->d_name.name;
-
-		case SYSFS_KOBJ_ATTR:
-			attr = sd->s_element;
-			return attr->name;
-
-		case SYSFS_KOBJ_BIN_ATTR:
-			bin_attr = sd->s_element;
-			return bin_attr->attr.name;
+	struct inode *inode;
 
-		case SYSFS_KOBJ_LINK:
-			sl = sd->s_element;
-			return sl->link_name;
-	}
-	return NULL;
-}
+	inode = iget_locked(sysfs_sb, sd->s_ino);
+	if (inode && (inode->i_state & I_NEW))
+		sysfs_init_inode(sd, inode);
 
-static inline void orphan_all_buffers(struct inode *node)
-{
-	struct sysfs_buffer_collection *set;
-	struct sysfs_buffer *buf;
-
-	mutex_lock_nested(&node->i_mutex, I_MUTEX_CHILD);
-	set = node->i_private;
-	if (set) {
-		list_for_each_entry(buf, &set->associates, associates) {
-			down(&buf->sem);
-			buf->orphaned = 1;
-			up(&buf->sem);
-		}
-	}
-	mutex_unlock(&node->i_mutex);
+	return inode;
 }
 
-
-/*
- * Unhashes the dentry corresponding to given sysfs_dirent
- * Called with parent inode's i_mutex held.
+/**
+ *	sysfs_instantiate - instantiate dentry
+ *	@dentry: dentry to be instantiated
+ *	@inode: inode associated with @sd
+ *
+ *	Unlock @inode if locked and instantiate @dentry with @inode.
+ *
+ *	LOCKING:
+ *	None.
  */
-void sysfs_drop_dentry(struct sysfs_dirent * sd, struct dentry * parent)
+void sysfs_instantiate(struct dentry *dentry, struct inode *inode)
 {
-	struct dentry *dentry = NULL;
-	struct inode *inode;
+	BUG_ON(!dentry || dentry->d_inode);
 
-	/* We're not holding a reference to ->s_dentry dentry but the
-	 * field will stay valid as long as sysfs_lock is held.
-	 */
-	spin_lock(&sysfs_lock);
-	spin_lock(&dcache_lock);
-
-	/* dget dentry if it's still alive */
-	if (sd->s_dentry && sd->s_dentry->d_inode)
-		dentry = dget_locked(sd->s_dentry);
-
-	spin_unlock(&dcache_lock);
-	spin_unlock(&sysfs_lock);
-
-	/* drop dentry */
-	if (dentry) {
-		spin_lock(&dcache_lock);
-		spin_lock(&dentry->d_lock);
-		if (!d_unhashed(dentry) && dentry->d_inode) {
-			inode = dentry->d_inode;
-			spin_lock(&inode->i_lock);
-			__iget(inode);
-			spin_unlock(&inode->i_lock);
-			dget_locked(dentry);
-			__d_drop(dentry);
-			spin_unlock(&dentry->d_lock);
-			spin_unlock(&dcache_lock);
-			simple_unlink(parent->d_inode, dentry);
-			orphan_all_buffers(inode);
-			iput(inode);
-		} else {
-			spin_unlock(&dentry->d_lock);
-			spin_unlock(&dcache_lock);
-		}
+	if (inode->i_state & I_NEW)
+		unlock_new_inode(inode);
 
-		dput(dentry);
-	}
+	d_instantiate(dentry, inode);
 }
 
-int sysfs_hash_and_remove(struct dentry * dir, const char * name)
+int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const char *name)
 {
-	struct sysfs_dirent * sd;
-	struct sysfs_dirent * parent_sd;
-	int found = 0;
+	struct sysfs_addrm_cxt acxt;
+	struct sysfs_dirent **pos, *sd;
 
-	if (!dir)
+	if (!dir_sd)
 		return -ENOENT;
 
-	if (dir->d_inode == NULL)
-		/* no inode means this hasn't been made visible yet */
-		return -ENOENT;
+	sysfs_addrm_start(&acxt, dir_sd);
+
+	for (pos = &dir_sd->s_children; *pos; pos = &(*pos)->s_sibling) {
+		sd = *pos;
 
-	parent_sd = dir->d_fsdata;
-	mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT);
-	list_for_each_entry(sd, &parent_sd->s_children, s_sibling) {
-		if (!sd->s_element)
+		if (!sysfs_type(sd))
 			continue;
-		if (!strcmp(sysfs_get_name(sd), name)) {
-			list_del_init(&sd->s_sibling);
-			sysfs_drop_dentry(sd, dir);
-			sysfs_put(sd);
-			found = 1;
+		if (!strcmp(sd->s_name, name)) {
+			*pos = sd->s_sibling;
+			sd->s_sibling = NULL;
+			sysfs_remove_one(&acxt, sd);
 			break;
 		}
 	}
-	mutex_unlock(&dir->d_inode->i_mutex);
 
-	return found ? 0 : -ENOENT;
+	if (sysfs_addrm_finish(&acxt))
+		return 0;
+	return -ENOENT;
 }
diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c
index 00ab9125d398..402cc356203c 100644
--- a/fs/sysfs/mount.c
+++ b/fs/sysfs/mount.c
@@ -19,28 +19,18 @@ struct vfsmount *sysfs_mount;
 struct super_block * sysfs_sb = NULL;
 struct kmem_cache *sysfs_dir_cachep;
 
-static void sysfs_clear_inode(struct inode *inode);
-
 static const struct super_operations sysfs_ops = {
 	.statfs		= simple_statfs,
 	.drop_inode	= sysfs_delete_inode,
-	.clear_inode	= sysfs_clear_inode,
 };
 
-static struct sysfs_dirent sysfs_root = {
-	.s_sibling	= LIST_HEAD_INIT(sysfs_root.s_sibling),
-	.s_children	= LIST_HEAD_INIT(sysfs_root.s_children),
-	.s_element	= NULL,
-	.s_type		= SYSFS_ROOT,
-	.s_iattr	= NULL,
+struct sysfs_dirent sysfs_root = {
+	.s_count	= ATOMIC_INIT(1),
+	.s_flags	= SYSFS_ROOT,
+	.s_mode		= S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO,
 	.s_ino		= 1,
 };
 
-static void sysfs_clear_inode(struct inode *inode)
-{
-	kfree(inode->i_private);
-}
-
 static int sysfs_fill_super(struct super_block *sb, void *data, int silent)
 {
 	struct inode *inode;
@@ -53,24 +43,26 @@ static int sysfs_fill_super(struct super_block *sb, void *data, int silent)
 	sb->s_time_gran = 1;
 	sysfs_sb = sb;
 
-	inode = sysfs_new_inode(S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO,
-				 &sysfs_root);
-	if (inode) {
-		inode->i_op = &sysfs_dir_inode_operations;
-		inode->i_fop = &sysfs_dir_operations;
-		/* directory inodes start off with i_nlink == 2 (for "." entry) */
-		inc_nlink(inode);
-	} else {
+	inode = new_inode(sysfs_sb);
+	if (!inode) {
 		pr_debug("sysfs: could not get root inode\n");
 		return -ENOMEM;
 	}
 
+	sysfs_init_inode(&sysfs_root, inode);
+
+	inode->i_op = &sysfs_dir_inode_operations;
+	inode->i_fop = &sysfs_dir_operations;
+	/* directory inodes start off with i_nlink == 2 (for "." entry) */
+	inc_nlink(inode);
+
 	root = d_alloc_root(inode);
 	if (!root) {
 		pr_debug("%s: could not get root dentry!\n",__FUNCTION__);
 		iput(inode);
 		return -ENOMEM;
 	}
+	sysfs_root.s_dentry = root;
 	root->d_fsdata = &sysfs_root;
 	sb->s_root = root;
 	return 0;
diff --git a/fs/sysfs/symlink.c b/fs/sysfs/symlink.c
index 7b9c5bfde920..2f86e0422290 100644
--- a/fs/sysfs/symlink.c
+++ b/fs/sysfs/symlink.c
@@ -11,71 +11,39 @@
 
 #include "sysfs.h"
 
-static int object_depth(struct kobject * kobj)
+static int object_depth(struct sysfs_dirent *sd)
 {
-	struct kobject * p = kobj;
 	int depth = 0;
-	do { depth++; } while ((p = p->parent));
+
+	for (; sd->s_parent; sd = sd->s_parent)
+		depth++;
+
 	return depth;
 }
 
-static int object_path_length(struct kobject * kobj)
+static int object_path_length(struct sysfs_dirent * sd)
 {
-	struct kobject * p = kobj;
 	int length = 1;
-	do {
-		length += strlen(kobject_name(p)) + 1;
-		p = p->parent;
-	} while (p);
+
+	for (; sd->s_parent; sd = sd->s_parent)
+		length += strlen(sd->s_name) + 1;
+
 	return length;
 }
 
-static void fill_object_path(struct kobject * kobj, char * buffer, int length)
+static void fill_object_path(struct sysfs_dirent *sd, char *buffer, int length)
 {
-	struct kobject * p;
-
 	--length;
-	for (p = kobj; p; p = p->parent) {
-		int cur = strlen(kobject_name(p));
+	for (; sd->s_parent; sd = sd->s_parent) {
+		int cur = strlen(sd->s_name);
 
 		/* back up enough to print this bus id with '/' */
 		length -= cur;
-		strncpy(buffer + length,kobject_name(p),cur);
+		strncpy(buffer + length, sd->s_name, cur);
 		*(buffer + --length) = '/';
 	}
 }
 
-static int sysfs_add_link(struct dentry * parent, const char * name, struct kobject * target)
-{
-	struct sysfs_dirent * parent_sd = parent->d_fsdata;
-	struct sysfs_symlink * sl;
-	int error = 0;
-
-	error = -ENOMEM;
-	sl = kmalloc(sizeof(*sl), GFP_KERNEL);
-	if (!sl)
-		goto exit1;
-
-	sl->link_name = kmalloc(strlen(name) + 1, GFP_KERNEL);
-	if (!sl->link_name)
-		goto exit2;
-
-	strcpy(sl->link_name, name);
-	sl->target_kobj = kobject_get(target);
-
-	error = sysfs_make_dirent(parent_sd, NULL, sl, S_IFLNK|S_IRWXUGO,
-				SYSFS_KOBJ_LINK);
-	if (!error)
-		return 0;
-
-	kobject_put(target);
-	kfree(sl->link_name);
-exit2:
-	kfree(sl);
-exit1:
-	return error;
-}
-
 /**
  *	sysfs_create_link - create symlink between two objects.
  *	@kobj:	object whose directory we're creating the link in.
@@ -84,24 +52,57 @@ exit1:
  */
 int sysfs_create_link(struct kobject * kobj, struct kobject * target, const char * name)
 {
-	struct dentry *dentry = NULL;
-	int error = -EEXIST;
+	struct sysfs_dirent *parent_sd = NULL;
+	struct sysfs_dirent *target_sd = NULL;
+	struct sysfs_dirent *sd = NULL;
+	struct sysfs_addrm_cxt acxt;
+	int error;
 
 	BUG_ON(!name);
 
 	if (!kobj) {
 		if (sysfs_mount && sysfs_mount->mnt_sb)
-			dentry = sysfs_mount->mnt_sb->s_root;
+			parent_sd = sysfs_mount->mnt_sb->s_root->d_fsdata;
 	} else
-		dentry = kobj->dentry;
+		parent_sd = kobj->sd;
+
+	error = -EFAULT;
+	if (!parent_sd)
+		goto out_put;
+
+	/* target->sd can go away beneath us but is protected with
+	 * sysfs_assoc_lock.  Fetch target_sd from it.
+	 */
+	spin_lock(&sysfs_assoc_lock);
+	if (target->sd)
+		target_sd = sysfs_get(target->sd);
+	spin_unlock(&sysfs_assoc_lock);
+
+	error = -ENOENT;
+	if (!target_sd)
+		goto out_put;
+
+	error = -ENOMEM;
+	sd = sysfs_new_dirent(name, S_IFLNK|S_IRWXUGO, SYSFS_KOBJ_LINK);
+	if (!sd)
+		goto out_put;
+	sd->s_elem.symlink.target_sd = target_sd;
 
-	if (!dentry)
-		return -EFAULT;
+	sysfs_addrm_start(&acxt, parent_sd);
 
-	mutex_lock(&dentry->d_inode->i_mutex);
-	if (!sysfs_dirent_exist(dentry->d_fsdata, name))
-		error = sysfs_add_link(dentry, name, target);
-	mutex_unlock(&dentry->d_inode->i_mutex);
+	if (!sysfs_find_dirent(parent_sd, name)) {
+		sysfs_add_one(&acxt, sd);
+		sysfs_link_sibling(sd);
+	}
+
+	if (sysfs_addrm_finish(&acxt))
+		return 0;
+
+	error = -EEXIST;
+	/* fall through */
+ out_put:
+	sysfs_put(target_sd);
+	sysfs_put(sd);
 	return error;
 }
 
@@ -114,17 +115,17 @@ int sysfs_create_link(struct kobject * kobj, struct kobject * target, const char
 
 void sysfs_remove_link(struct kobject * kobj, const char * name)
 {
-	sysfs_hash_and_remove(kobj->dentry,name);
+	sysfs_hash_and_remove(kobj->sd, name);
 }
 
-static int sysfs_get_target_path(struct kobject * kobj, struct kobject * target,
-				 char *path)
+static int sysfs_get_target_path(struct sysfs_dirent * parent_sd,
+				 struct sysfs_dirent * target_sd, char *path)
 {
 	char * s;
 	int depth, size;
 
-	depth = object_depth(kobj);
-	size = object_path_length(target) + depth * 3 - 1;
+	depth = object_depth(parent_sd);
+	size = object_path_length(target_sd) + depth * 3 - 1;
 	if (size > PATH_MAX)
 		return -ENAMETOOLONG;
 
@@ -133,7 +134,7 @@ static int sysfs_get_target_path(struct kobject * kobj, struct kobject * target,
 	for (s = path; depth--; s += 3)
 		strcpy(s,"../");
 
-	fill_object_path(target, path, size);
+	fill_object_path(target_sd, path, size);
 	pr_debug("%s: path = '%s'\n", __FUNCTION__, path);
 
 	return 0;
@@ -141,27 +142,16 @@ static int sysfs_get_target_path(struct kobject * kobj, struct kobject * target,
 
 static int sysfs_getlink(struct dentry *dentry, char * path)
 {
-	struct kobject *kobj, *target_kobj;
-	int error = 0;
+	struct sysfs_dirent *sd = dentry->d_fsdata;
+	struct sysfs_dirent *parent_sd = sd->s_parent;
+	struct sysfs_dirent *target_sd = sd->s_elem.symlink.target_sd;
+	int error;
 
-	kobj = sysfs_get_kobject(dentry->d_parent);
-	if (!kobj)
-		return -EINVAL;
+	mutex_lock(&sysfs_mutex);
+	error = sysfs_get_target_path(parent_sd, target_sd, path);
+	mutex_unlock(&sysfs_mutex);
 
-	target_kobj = sysfs_get_kobject(dentry);
-	if (!target_kobj) {
-		kobject_put(kobj);
-		return -EINVAL;
-	}
-
-	down_read(&sysfs_rename_sem);
-	error = sysfs_get_target_path(kobj, target_kobj, path);
-	up_read(&sysfs_rename_sem);
-	
-	kobject_put(kobj);
-	kobject_put(target_kobj);
 	return error;
-
 }
 
 static void *sysfs_follow_link(struct dentry *dentry, struct nameidata *nd)
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h
index 502c949c402d..6a37f2386a8d 100644
--- a/fs/sysfs/sysfs.h
+++ b/fs/sysfs/sysfs.h
@@ -1,9 +1,40 @@
+struct sysfs_elem_dir {
+	struct kobject		* kobj;
+};
+
+struct sysfs_elem_symlink {
+	struct sysfs_dirent	* target_sd;
+};
+
+struct sysfs_elem_attr {
+	struct attribute	* attr;
+};
+
+struct sysfs_elem_bin_attr {
+	struct bin_attribute	* bin_attr;
+};
+
+/*
+ * As long as s_count reference is held, the sysfs_dirent itself is
+ * accessible.  Dereferencing s_elem or any other outer entity
+ * requires s_active reference.
+ */
 struct sysfs_dirent {
 	atomic_t		s_count;
-	struct list_head	s_sibling;
-	struct list_head	s_children;
-	void 			* s_element;
-	int			s_type;
+	atomic_t		s_active;
+	struct sysfs_dirent	* s_parent;
+	struct sysfs_dirent	* s_sibling;
+	struct sysfs_dirent	* s_children;
+	const char		* s_name;
+
+	union {
+		struct sysfs_elem_dir		dir;
+		struct sysfs_elem_symlink	symlink;
+		struct sysfs_elem_attr		attr;
+		struct sysfs_elem_bin_attr	bin_attr;
+	}			s_elem;
+
+	unsigned int		s_flags;
 	umode_t			s_mode;
 	ino_t			s_ino;
 	struct dentry		* s_dentry;
@@ -11,30 +42,60 @@ struct sysfs_dirent {
 	atomic_t		s_event;
 };
 
+#define SD_DEACTIVATED_BIAS	INT_MIN
+
+struct sysfs_addrm_cxt {
+	struct sysfs_dirent	*parent_sd;
+	struct inode		*parent_inode;
+	struct sysfs_dirent	*removed;
+	int			cnt;
+};
+
 extern struct vfsmount * sysfs_mount;
+extern struct sysfs_dirent sysfs_root;
 extern struct kmem_cache *sysfs_dir_cachep;
 
-extern void sysfs_delete_inode(struct inode *inode);
-extern struct inode * sysfs_new_inode(mode_t mode, struct sysfs_dirent *);
-extern int sysfs_create(struct dentry *, int mode, int (*init)(struct inode *));
+extern struct dentry *sysfs_get_dentry(struct sysfs_dirent *sd);
+extern void sysfs_link_sibling(struct sysfs_dirent *sd);
+extern void sysfs_unlink_sibling(struct sysfs_dirent *sd);
+extern struct sysfs_dirent *sysfs_get_active(struct sysfs_dirent *sd);
+extern void sysfs_put_active(struct sysfs_dirent *sd);
+extern struct sysfs_dirent *sysfs_get_active_two(struct sysfs_dirent *sd);
+extern void sysfs_put_active_two(struct sysfs_dirent *sd);
+extern void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt,
+			      struct sysfs_dirent *parent_sd);
+extern void sysfs_add_one(struct sysfs_addrm_cxt *acxt,
+			  struct sysfs_dirent *sd);
+extern void sysfs_remove_one(struct sysfs_addrm_cxt *acxt,
+			     struct sysfs_dirent *sd);
+extern int sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt);
 
-extern int sysfs_dirent_exist(struct sysfs_dirent *, const unsigned char *);
-extern int sysfs_make_dirent(struct sysfs_dirent *, struct dentry *, void *,
-				umode_t, int);
-
-extern int sysfs_add_file(struct dentry *, const struct attribute *, int);
-extern int sysfs_hash_and_remove(struct dentry * dir, const char * name);
+extern void sysfs_delete_inode(struct inode *inode);
+extern void sysfs_init_inode(struct sysfs_dirent *sd, struct inode *inode);
+extern struct inode * sysfs_get_inode(struct sysfs_dirent *sd);
+extern void sysfs_instantiate(struct dentry *dentry, struct inode *inode);
+
+extern void release_sysfs_dirent(struct sysfs_dirent * sd);
+extern struct sysfs_dirent *sysfs_find_dirent(struct sysfs_dirent *parent_sd,
+					      const unsigned char *name);
+extern struct sysfs_dirent *sysfs_get_dirent(struct sysfs_dirent *parent_sd,
+					     const unsigned char *name);
+extern struct sysfs_dirent *sysfs_new_dirent(const char *name, umode_t mode,
+					     int type);
+
+extern int sysfs_add_file(struct sysfs_dirent *dir_sd,
+			  const struct attribute *attr, int type);
+extern int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const char *name);
 extern struct sysfs_dirent *sysfs_find(struct sysfs_dirent *dir, const char * name);
 
-extern int sysfs_create_subdir(struct kobject *, const char *, struct dentry **);
-extern void sysfs_remove_subdir(struct dentry *);
+extern int sysfs_create_subdir(struct kobject *kobj, const char *name,
+			       struct sysfs_dirent **p_sd);
+extern void sysfs_remove_subdir(struct sysfs_dirent *sd);
 
-extern const unsigned char * sysfs_get_name(struct sysfs_dirent *sd);
-extern void sysfs_drop_dentry(struct sysfs_dirent *sd, struct dentry *parent);
 extern int sysfs_setattr(struct dentry *dentry, struct iattr *iattr);
 
-extern spinlock_t sysfs_lock;
-extern struct rw_semaphore sysfs_rename_sem;
+extern spinlock_t sysfs_assoc_lock;
+extern struct mutex sysfs_mutex;
 extern struct super_block * sysfs_sb;
 extern const struct file_operations sysfs_dir_operations;
 extern const struct file_operations sysfs_file_operations;
@@ -42,73 +103,9 @@ extern const struct file_operations bin_fops;
 extern const struct inode_operations sysfs_dir_inode_operations;
 extern const struct inode_operations sysfs_symlink_inode_operations;
 
-struct sysfs_symlink {
-	char * link_name;
-	struct kobject * target_kobj;
-};
-
-struct sysfs_buffer {
-	struct list_head		associates;
-	size_t				count;
-	loff_t				pos;
-	char				* page;
-	struct sysfs_ops		* ops;
-	struct semaphore		sem;
-	int				orphaned;
-	int				needs_read_fill;
-	int				event;
-};
-
-struct sysfs_buffer_collection {
-	struct list_head	associates;
-};
-
-static inline struct kobject * to_kobj(struct dentry * dentry)
-{
-	struct sysfs_dirent * sd = dentry->d_fsdata;
-	return ((struct kobject *) sd->s_element);
-}
-
-static inline struct attribute * to_attr(struct dentry * dentry)
+static inline unsigned int sysfs_type(struct sysfs_dirent *sd)
 {
-	struct sysfs_dirent * sd = dentry->d_fsdata;
-	return ((struct attribute *) sd->s_element);
-}
-
-static inline struct bin_attribute * to_bin_attr(struct dentry * dentry)
-{
-	struct sysfs_dirent * sd = dentry->d_fsdata;
-	return ((struct bin_attribute *) sd->s_element);
-}
-
-static inline struct kobject *sysfs_get_kobject(struct dentry *dentry)
-{
-	struct kobject * kobj = NULL;
-
-	spin_lock(&dcache_lock);
-	if (!d_unhashed(dentry)) {
-		struct sysfs_dirent * sd = dentry->d_fsdata;
-		if (sd->s_type & SYSFS_KOBJ_LINK) {
-			struct sysfs_symlink * sl = sd->s_element;
-			kobj = kobject_get(sl->target_kobj);
-		} else
-			kobj = kobject_get(sd->s_element);
-	}
-	spin_unlock(&dcache_lock);
-
-	return kobj;
-}
-
-static inline void release_sysfs_dirent(struct sysfs_dirent * sd)
-{
-	if (sd->s_type & SYSFS_KOBJ_LINK) {
-		struct sysfs_symlink * sl = sd->s_element;
-		kfree(sl->link_name);
-		kobject_put(sl->target_kobj);
-		kfree(sl);
-	}
-	kfree(sd->s_iattr);
-	kmem_cache_free(sysfs_dir_cachep, sd);
+	return sd->s_flags & SYSFS_TYPE_MASK;
 }
 
 static inline struct sysfs_dirent * sysfs_get(struct sysfs_dirent * sd)
@@ -122,7 +119,7 @@ static inline struct sysfs_dirent * sysfs_get(struct sysfs_dirent * sd)
 
 static inline void sysfs_put(struct sysfs_dirent * sd)
 {
-	if (atomic_dec_and_test(&sd->s_count))
+	if (sd && atomic_dec_and_test(&sd->s_count))
 		release_sysfs_dirent(sd);
 }