From d05fdb0cec75415b2d9eb95748386e67414e49c3 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 22 Jun 2005 17:16:19 +0000 Subject: [PATCH] RPC: Fix a race with rpc_restart_call() If the task->tk_exit() wants to restart the RPC call after delaying then the current RPC code will clobber the timer by calling rpc_delete_timer() immediately after re-entering the loop in __rpc_execute(). Problem noticed by Oleg Nesterov Signed-off-by: Trond Myklebust --- net/sunrpc/sched.c | 53 ++++++++++++++++++++++++++++++----------------------- 1 file changed, 30 insertions(+), 23 deletions(-) diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index c06614d0e31d..cc298fa4b81d 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -554,6 +554,30 @@ __rpc_atrun(struct rpc_task *task) rpc_wake_up_task(task); } +/* + * Helper that calls task->tk_exit if it exists and then returns + * true if we should exit __rpc_execute. + */ +static inline int __rpc_do_exit(struct rpc_task *task) +{ + if (task->tk_exit != NULL) { + lock_kernel(); + task->tk_exit(task); + unlock_kernel(); + /* If tk_action is non-null, we should restart the call */ + if (task->tk_action != NULL) { + if (!RPC_ASSASSINATED(task)) { + /* Release RPC slot and buffer memory */ + xprt_release(task); + rpc_free(task); + return 0; + } + printk(KERN_ERR "RPC: dead task tried to walk away.\n"); + } + } + return 1; +} + /* * This is the RPC `scheduler' (or rather, the finite state machine). */ @@ -566,8 +590,7 @@ static int __rpc_execute(struct rpc_task *task) BUG_ON(RPC_IS_QUEUED(task)); - restarted: - while (1) { + for (;;) { /* * Garbage collection of pending timers... */ @@ -600,11 +623,12 @@ static int __rpc_execute(struct rpc_task *task) * by someone else. */ if (!RPC_IS_QUEUED(task)) { - if (!task->tk_action) + if (task->tk_action != NULL) { + lock_kernel(); + task->tk_action(task); + unlock_kernel(); + } else if (__rpc_do_exit(task)) break; - lock_kernel(); - task->tk_action(task); - unlock_kernel(); } /* @@ -645,23 +669,6 @@ static int __rpc_execute(struct rpc_task *task) dprintk("RPC: %4d sync task resuming\n", task->tk_pid); } - if (task->tk_exit) { - lock_kernel(); - task->tk_exit(task); - unlock_kernel(); - /* If tk_action is non-null, the user wants us to restart */ - if (task->tk_action) { - if (!RPC_ASSASSINATED(task)) { - /* Release RPC slot and buffer memory */ - if (task->tk_rqstp) - xprt_release(task); - rpc_free(task); - goto restarted; - } - printk(KERN_ERR "RPC: dead task tries to walk away.\n"); - } - } - dprintk("RPC: %4d exit() = %d\n", task->tk_pid, task->tk_status); status = task->tk_status; -- cgit v1.2.3-59-g8ed1b From 334ccfd545bba9690515f2c5c167d5adb161989b Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 22 Jun 2005 17:16:19 +0000 Subject: [PATCH] RPC: Ensure XDR iovec length is initialized correctly in call_header Fix up call_header() so that it calls xdr_adjust_iovec(). Fix calculation of the scratch buffer length in xdr_init_encode(). Signed-off-by: Trond Myklebust --- net/sunrpc/clnt.c | 4 +++- net/sunrpc/svc.c | 1 + net/sunrpc/xdr.c | 18 +++++++++++++++--- 3 files changed, 19 insertions(+), 4 deletions(-) diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 02bc029d46fe..209aaf595695 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -957,7 +957,9 @@ call_header(struct rpc_task *task) *p++ = htonl(clnt->cl_prog); /* program number */ *p++ = htonl(clnt->cl_vers); /* program version */ *p++ = htonl(task->tk_msg.rpc_proc->p_proc); /* procedure */ - return rpcauth_marshcred(task, p); + p = rpcauth_marshcred(task, p); + req->rq_slen = xdr_adjust_iovec(&req->rq_svec[0], p); + return p; } /* diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index bb2d99f33315..a02d424a7409 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -281,6 +281,7 @@ svc_process(struct svc_serv *serv, struct svc_rqst *rqstp) rqstp->rq_res.len = 0; rqstp->rq_res.page_base = 0; rqstp->rq_res.page_len = 0; + rqstp->rq_res.buflen = PAGE_SIZE; rqstp->rq_res.tail[0].iov_len = 0; /* tcp needs a space for the record length... */ if (rqstp->rq_prot == IPPROTO_TCP) diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index 67b9f035ba86..f86d1baa6302 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -616,12 +616,24 @@ xdr_shift_buf(struct xdr_buf *buf, size_t len) void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, uint32_t *p) { struct kvec *iov = buf->head; + int scratch_len = buf->buflen - buf->page_len - buf->tail[0].iov_len; + BUG_ON(scratch_len < 0); xdr->buf = buf; xdr->iov = iov; - xdr->end = (uint32_t *)((char *)iov->iov_base + iov->iov_len); - buf->len = iov->iov_len = (char *)p - (char *)iov->iov_base; - xdr->p = p; + xdr->p = (uint32_t *)((char *)iov->iov_base + iov->iov_len); + xdr->end = (uint32_t *)((char *)iov->iov_base + scratch_len); + BUG_ON(iov->iov_len > scratch_len); + + if (p != xdr->p && p != NULL) { + size_t len; + + BUG_ON(p < xdr->p || p > xdr->end); + len = (char *)p - (char *)xdr->p; + xdr->p = p; + buf->len += len; + iov->iov_len += len; + } } EXPORT_SYMBOL(xdr_init_encode); -- cgit v1.2.3-59-g8ed1b From 5b616f5d596c0b056129f8aeafbc08409b3cd050 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 22 Jun 2005 17:16:20 +0000 Subject: [PATCH] RPC: Make rpc_create_client() destroy the transport on failure. This saves us a couple of lines of cleanup code for each call. Signed-off-by: Trond Myklebust --- fs/lockd/host.c | 4 +--- fs/lockd/mon.c | 5 ++--- fs/nfs/inode.c | 2 -- fs/nfs/mount_clnt.c | 4 +--- fs/nfsd/nfs4callback.c | 4 +--- net/sunrpc/clnt.c | 1 + net/sunrpc/pmap_clnt.c | 4 +--- net/sunrpc/sunrpc_syms.c | 1 - 8 files changed, 7 insertions(+), 18 deletions(-) diff --git a/fs/lockd/host.c b/fs/lockd/host.c index 52707c5ad6ea..90a62f27914c 100644 --- a/fs/lockd/host.c +++ b/fs/lockd/host.c @@ -193,10 +193,8 @@ nlm_bind_host(struct nlm_host *host) /* Existing NLM servers accept AUTH_UNIX only */ clnt = rpc_create_client(xprt, host->h_name, &nlm_program, host->h_version, RPC_AUTH_UNIX); - if (IS_ERR(clnt)) { - xprt_destroy(xprt); + if (IS_ERR(clnt)) goto forgetit; - } clnt->cl_autobind = 1; /* turn on pmap queries */ xprt->nocong = 1; /* No congestion control for NLM */ xprt->resvport = 1; /* NLM requires a reserved port */ diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c index 6fc1bebeec1d..81b5e7778d70 100644 --- a/fs/lockd/mon.c +++ b/fs/lockd/mon.c @@ -120,15 +120,14 @@ nsm_create(void) &nsm_program, SM_VERSION, RPC_AUTH_NULL); if (IS_ERR(clnt)) - goto out_destroy; + goto out_err; clnt->cl_softrtry = 1; clnt->cl_chatty = 1; clnt->cl_oneshot = 1; xprt->resvport = 1; /* NSM requires a reserved port */ return clnt; -out_destroy: - xprt_destroy(xprt); +out_err: return clnt; } diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index f2317f3e29f9..ea784969fb85 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -383,7 +383,6 @@ nfs_create_client(struct nfs_server *server, const struct nfs_mount_data *data) return clnt; out_fail: - xprt_destroy(xprt); return clnt; } @@ -1623,7 +1622,6 @@ static int nfs4_fill_super(struct super_block *sb, struct nfs4_mount_data *data, if (IS_ERR(clnt)) { up_write(&clp->cl_sem); printk(KERN_WARNING "NFS: cannot create RPC client.\n"); - xprt_destroy(xprt); err = PTR_ERR(clnt); goto out_fail; } diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c index 9d3ddad96d9e..0e82617f2de0 100644 --- a/fs/nfs/mount_clnt.c +++ b/fs/nfs/mount_clnt.c @@ -80,9 +80,7 @@ mnt_create(char *hostname, struct sockaddr_in *srvaddr, int version, clnt = rpc_create_client(xprt, hostname, &mnt_program, version, RPC_AUTH_UNIX); - if (IS_ERR(clnt)) { - xprt_destroy(xprt); - } else { + if (!IS_ERR(clnt)) { clnt->cl_softrtry = 1; clnt->cl_chatty = 1; clnt->cl_oneshot = 1; diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 1a55dfcb74bc..634465e9cfc6 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -430,7 +430,7 @@ nfsd4_probe_callback(struct nfs4_client *clp) clnt = rpc_create_client(xprt, hostname, program, 1, RPC_AUTH_UNIX); if (IS_ERR(clnt)) { dprintk("NFSD: couldn't create callback client\n"); - goto out_xprt; + goto out_err; } clnt->cl_intr = 0; clnt->cl_softrtry = 1; @@ -465,8 +465,6 @@ out_rpciod: out_clnt: rpc_shutdown_client(clnt); goto out_err; -out_xprt: - xprt_destroy(xprt); out_err: dprintk("NFSD: warning: no callback path to client %.*s\n", (int)clp->cl_name.len, clp->cl_name.data); diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 209aaf595695..99515d7727a6 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -178,6 +178,7 @@ out_no_path: kfree(clnt->cl_server); kfree(clnt); out_err: + xprt_destroy(xprt); return ERR_PTR(err); } diff --git a/net/sunrpc/pmap_clnt.c b/net/sunrpc/pmap_clnt.c index d0b1d2c34a4d..97c420ff1ee0 100644 --- a/net/sunrpc/pmap_clnt.c +++ b/net/sunrpc/pmap_clnt.c @@ -210,9 +210,7 @@ pmap_create(char *hostname, struct sockaddr_in *srvaddr, int proto) clnt = rpc_create_client(xprt, hostname, &pmap_program, RPC_PMAP_VERSION, RPC_AUTH_UNIX); - if (IS_ERR(clnt)) { - xprt_destroy(xprt); - } else { + if (!IS_ERR(clnt)) { clnt->cl_softrtry = 1; clnt->cl_chatty = 1; clnt->cl_oneshot = 1; diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c index d4f26bf9e732..1b0ff7e0e869 100644 --- a/net/sunrpc/sunrpc_syms.c +++ b/net/sunrpc/sunrpc_syms.c @@ -61,7 +61,6 @@ EXPORT_SYMBOL(rpc_mkpipe); /* Client transport */ EXPORT_SYMBOL(xprt_create_proto); -EXPORT_SYMBOL(xprt_destroy); EXPORT_SYMBOL(xprt_set_timeout); EXPORT_SYMBOL(xprt_udp_slot_table_entries); EXPORT_SYMBOL(xprt_tcp_slot_table_entries); -- cgit v1.2.3-59-g8ed1b From 5ee0ed7d3ab620a764740fb018f469d45f561931 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 22 Jun 2005 17:16:20 +0000 Subject: [PATCH] RPC: Make rpc_create_client() probe server for RPC program+version support Ensure that we don't create an RPC client without checking that the server does indeed support the RPC program + version that we are trying to set up. This enables us to immediately return an error to "mount" if it turns out that the server is only supporting NFSv2, when we requested NFSv3 or NFSv4. Signed-off-by: Trond Myklebust --- fs/lockd/host.c | 4 +-- fs/lockd/mon.c | 2 +- include/linux/sunrpc/clnt.h | 4 +++ net/sunrpc/clnt.c | 59 ++++++++++++++++++++++++++++++++++++++++++++- net/sunrpc/pmap_clnt.c | 2 +- 5 files changed, 66 insertions(+), 5 deletions(-) diff --git a/fs/lockd/host.c b/fs/lockd/host.c index 90a62f27914c..82c77df81c5f 100644 --- a/fs/lockd/host.c +++ b/fs/lockd/host.c @@ -189,6 +189,8 @@ nlm_bind_host(struct nlm_host *host) goto forgetit; xprt_set_timeout(&xprt->timeout, 5, nlmsvc_timeout); + xprt->nocong = 1; /* No congestion control for NLM */ + xprt->resvport = 1; /* NLM requires a reserved port */ /* Existing NLM servers accept AUTH_UNIX only */ clnt = rpc_create_client(xprt, host->h_name, &nlm_program, @@ -196,8 +198,6 @@ nlm_bind_host(struct nlm_host *host) if (IS_ERR(clnt)) goto forgetit; clnt->cl_autobind = 1; /* turn on pmap queries */ - xprt->nocong = 1; /* No congestion control for NLM */ - xprt->resvport = 1; /* NLM requires a reserved port */ host->h_rpcclnt = clnt; } diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c index 81b5e7778d70..2d144abe84ad 100644 --- a/fs/lockd/mon.c +++ b/fs/lockd/mon.c @@ -115,6 +115,7 @@ nsm_create(void) xprt = xprt_create_proto(IPPROTO_UDP, &sin, NULL); if (IS_ERR(xprt)) return (struct rpc_clnt *)xprt; + xprt->resvport = 1; /* NSM requires a reserved port */ clnt = rpc_create_client(xprt, "localhost", &nsm_program, SM_VERSION, @@ -124,7 +125,6 @@ nsm_create(void) clnt->cl_softrtry = 1; clnt->cl_chatty = 1; clnt->cl_oneshot = 1; - xprt->resvport = 1; /* NSM requires a reserved port */ return clnt; out_err: diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 2709caf4d128..d25e80f77ff5 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -111,6 +111,9 @@ struct rpc_procinfo { struct rpc_clnt *rpc_create_client(struct rpc_xprt *xprt, char *servname, struct rpc_program *info, u32 version, rpc_authflavor_t authflavor); +struct rpc_clnt *rpc_new_client(struct rpc_xprt *xprt, char *servname, + struct rpc_program *info, + u32 version, rpc_authflavor_t authflavor); struct rpc_clnt *rpc_clone_client(struct rpc_clnt *); int rpc_shutdown_client(struct rpc_clnt *); int rpc_destroy_client(struct rpc_clnt *); @@ -129,6 +132,7 @@ void rpc_clnt_sigmask(struct rpc_clnt *clnt, sigset_t *oldset); void rpc_clnt_sigunmask(struct rpc_clnt *clnt, sigset_t *oldset); void rpc_setbufsize(struct rpc_clnt *, unsigned int, unsigned int); size_t rpc_max_payload(struct rpc_clnt *); +int rpc_ping(struct rpc_clnt *clnt, int flags); static __inline__ int rpc_call(struct rpc_clnt *clnt, u32 proc, void *argp, void *resp, int flags) diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 99515d7727a6..b36797ad8083 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -97,7 +97,7 @@ rpc_setup_pipedir(struct rpc_clnt *clnt, char *dir_name) * made to sleep too long. */ struct rpc_clnt * -rpc_create_client(struct rpc_xprt *xprt, char *servname, +rpc_new_client(struct rpc_xprt *xprt, char *servname, struct rpc_program *program, u32 vers, rpc_authflavor_t flavor) { @@ -182,6 +182,36 @@ out_err: return ERR_PTR(err); } +/** + * Create an RPC client + * @xprt - pointer to xprt struct + * @servname - name of server + * @info - rpc_program + * @version - rpc_program version + * @authflavor - rpc_auth flavour to use + * + * Creates an RPC client structure, then pings the server in order to + * determine if it is up, and if it supports this program and version. + * + * This function should never be called by asynchronous tasks such as + * the portmapper. + */ +struct rpc_clnt *rpc_create_client(struct rpc_xprt *xprt, char *servname, + struct rpc_program *info, u32 version, rpc_authflavor_t authflavor) +{ + struct rpc_clnt *clnt; + int err; + + clnt = rpc_new_client(xprt, servname, info, version, authflavor); + if (IS_ERR(clnt)) + return clnt; + err = rpc_ping(clnt, RPC_TASK_SOFT|RPC_TASK_NOINTR); + if (err == 0) + return clnt; + rpc_shutdown_client(clnt); + return ERR_PTR(err); +} + /* * This function clones the RPC client structure. It allows us to share the * same transport while varying parameters such as the authentication @@ -1086,3 +1116,30 @@ out_overflow: printk(KERN_WARNING "RPC %s: server reply was truncated.\n", __FUNCTION__); goto out_retry; } + +static int rpcproc_encode_null(void *rqstp, u32 *data, void *obj) +{ + return 0; +} + +static int rpcproc_decode_null(void *rqstp, u32 *data, void *obj) +{ + return 0; +} + +static struct rpc_procinfo rpcproc_null = { + .p_encode = rpcproc_encode_null, + .p_decode = rpcproc_decode_null, +}; + +int rpc_ping(struct rpc_clnt *clnt, int flags) +{ + struct rpc_message msg = { + .rpc_proc = &rpcproc_null, + }; + int err; + msg.rpc_cred = authnull_ops.lookup_cred(NULL, NULL, 0); + err = rpc_call_sync(clnt, &msg, flags); + put_rpccred(msg.rpc_cred); + return err; +} diff --git a/net/sunrpc/pmap_clnt.c b/net/sunrpc/pmap_clnt.c index 97c420ff1ee0..df4d84c9020d 100644 --- a/net/sunrpc/pmap_clnt.c +++ b/net/sunrpc/pmap_clnt.c @@ -207,7 +207,7 @@ pmap_create(char *hostname, struct sockaddr_in *srvaddr, int proto) xprt->addr.sin_port = htons(RPC_PMAP_PORT); /* printk("pmap: create clnt\n"); */ - clnt = rpc_create_client(xprt, hostname, + clnt = rpc_new_client(xprt, hostname, &pmap_program, RPC_PMAP_VERSION, RPC_AUTH_UNIX); if (!IS_ERR(clnt)) { -- cgit v1.2.3-59-g8ed1b From 9085bbcb76421a90bea28f4d3d03fa9977319c49 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 22 Jun 2005 17:16:20 +0000 Subject: [PATCH] NFS: Kill annoying mount version mismatch printks Ensure that we fix up the missing fields in the nfs_mount_data with sane defaults for older versions of mount, and return errors in the cases where we cannot. Convert a bunch of annoying warnings into dprintks() Return -EPROTONOSUPPORT rather than EIO if mount() tries to set NFSv3 without it actually being compiled in. Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 179 +++++++++++++++++++++++++++++++++------------------------ 1 file changed, 105 insertions(+), 74 deletions(-) diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index ea784969fb85..32ddcf69e9ac 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -366,13 +366,15 @@ nfs_create_client(struct nfs_server *server, const struct nfs_mount_data *data) xprt = xprt_create_proto(tcp ? IPPROTO_TCP : IPPROTO_UDP, &server->addr, &timeparms); if (IS_ERR(xprt)) { - printk(KERN_WARNING "NFS: cannot create RPC transport.\n"); + dprintk("%s: cannot create RPC transport. Error = %ld\n", + __FUNCTION__, PTR_ERR(xprt)); return (struct rpc_clnt *)xprt; } clnt = rpc_create_client(xprt, server->hostname, &nfs_program, server->rpc_ops->version, data->pseudoflavor); if (IS_ERR(clnt)) { - printk(KERN_WARNING "NFS: cannot create RPC client.\n"); + dprintk("%s: cannot create RPC client. Error = %ld\n", + __FUNCTION__, PTR_ERR(xprt)); goto out_fail; } @@ -426,21 +428,16 @@ nfs_fill_super(struct super_block *sb, struct nfs_mount_data *data, int silent) /* Check NFS protocol revision and initialize RPC op vector * and file handle pool. */ - if (server->flags & NFS_MOUNT_VER3) { #ifdef CONFIG_NFS_V3 + if (server->flags & NFS_MOUNT_VER3) { server->rpc_ops = &nfs_v3_clientops; server->caps |= NFS_CAP_READDIRPLUS; - if (data->version < 4) { - printk(KERN_NOTICE "NFS: NFSv3 not supported by mount program.\n"); - return -EIO; - } -#else - printk(KERN_NOTICE "NFS: NFSv3 not supported.\n"); - return -EIO; -#endif } else { server->rpc_ops = &nfs_v2_clientops; } +#else + server->rpc_ops = &nfs_v2_clientops; +#endif /* Fill in pseudoflavor for mount version < 5 */ if (!(data->flags & NFS_MOUNT_SECFLAVOUR)) @@ -1384,74 +1381,94 @@ static struct super_block *nfs_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *raw_data) { int error; - struct nfs_server *server; + struct nfs_server *server = NULL; struct super_block *s; struct nfs_fh *root; struct nfs_mount_data *data = raw_data; - if (!data) { - printk("nfs_read_super: missing data argument\n"); - return ERR_PTR(-EINVAL); + s = ERR_PTR(-EINVAL); + if (data == NULL) { + dprintk("%s: missing data argument\n", __FUNCTION__); + goto out_err; + } + if (data->version <= 0 || data->version > NFS_MOUNT_VERSION) { + dprintk("%s: bad mount version\n", __FUNCTION__); + goto out_err; } + switch (data->version) { + case 1: + data->namlen = 0; + case 2: + data->bsize = 0; + case 3: + if (data->flags & NFS_MOUNT_VER3) { + dprintk("%s: mount structure version %d does not support NFSv3\n", + __FUNCTION__, + data->version); + goto out_err; + } + data->root.size = NFS2_FHSIZE; + memcpy(data->root.data, data->old_root.data, NFS2_FHSIZE); + case 4: + if (data->flags & NFS_MOUNT_SECFLAVOUR) { + dprintk("%s: mount structure version %d does not support strong security\n", + __FUNCTION__, + data->version); + goto out_err; + } + case 5: + memset(data->context, 0, sizeof(data->context)); + } +#ifndef CONFIG_NFS_V3 + /* If NFSv3 is not compiled in, return -EPROTONOSUPPORT */ + s = ERR_PTR(-EPROTONOSUPPORT); + if (data->flags & NFS_MOUNT_VER3) { + dprintk("%s: NFSv3 not compiled into kernel\n", __FUNCTION__); + goto out_err; + } +#endif /* CONFIG_NFS_V3 */ + s = ERR_PTR(-ENOMEM); server = kmalloc(sizeof(struct nfs_server), GFP_KERNEL); if (!server) - return ERR_PTR(-ENOMEM); + goto out_err; memset(server, 0, sizeof(struct nfs_server)); /* Zero out the NFS state stuff */ init_nfsv4_state(server); - if (data->version != NFS_MOUNT_VERSION) { - printk("nfs warning: mount version %s than kernel\n", - data->version < NFS_MOUNT_VERSION ? "older" : "newer"); - if (data->version < 2) - data->namlen = 0; - if (data->version < 3) - data->bsize = 0; - if (data->version < 4) { - data->flags &= ~NFS_MOUNT_VER3; - data->root.size = NFS2_FHSIZE; - memcpy(data->root.data, data->old_root.data, NFS2_FHSIZE); - } - if (data->version < 5) - data->flags &= ~NFS_MOUNT_SECFLAVOUR; - } - root = &server->fh; if (data->flags & NFS_MOUNT_VER3) root->size = data->root.size; else root->size = NFS2_FHSIZE; + s = ERR_PTR(-EINVAL); if (root->size > sizeof(root->data)) { - printk("nfs_get_sb: invalid root filehandle\n"); - kfree(server); - return ERR_PTR(-EINVAL); + dprintk("%s: invalid root filehandle\n", __FUNCTION__); + goto out_err; } memcpy(root->data, data->root.data, root->size); /* We now require that the mount process passes the remote address */ memcpy(&server->addr, &data->addr, sizeof(server->addr)); if (server->addr.sin_addr.s_addr == INADDR_ANY) { - printk("NFS: mount program didn't pass remote address!\n"); - kfree(server); - return ERR_PTR(-EINVAL); + dprintk("%s: mount program didn't pass remote address!\n", + __FUNCTION__); + goto out_err; } - s = sget(fs_type, nfs_compare_super, nfs_set_super, server); - - if (IS_ERR(s) || s->s_root) { - kfree(server); - return s; + /* Fire up rpciod if not yet running */ + s = ERR_PTR(rpciod_up()); + if (IS_ERR(s)) { + dprintk("%s: couldn't start rpciod! Error = %ld\n", + __FUNCTION__, PTR_ERR(s)); + goto out_err; } - s->s_flags = flags; + s = sget(fs_type, nfs_compare_super, nfs_set_super, server); + if (IS_ERR(s) || s->s_root) + goto out_rpciod_down; - /* Fire up rpciod if not yet running */ - if (rpciod_up() != 0) { - printk(KERN_WARNING "NFS: couldn't start rpciod!\n"); - kfree(server); - return ERR_PTR(-EIO); - } + s->s_flags = flags; error = nfs_fill_super(s, data, flags & MS_VERBOSE ? 1 : 0); if (error) { @@ -1461,6 +1478,11 @@ static struct super_block *nfs_get_sb(struct file_system_type *fs_type, } s->s_flags |= MS_ACTIVE; return s; +out_rpciod_down: + rpciod_down(); +out_err: + kfree(server); + return s; } static void nfs_kill_super(struct super_block *s) @@ -1593,15 +1615,19 @@ static int nfs4_fill_super(struct super_block *sb, struct nfs4_mount_data *data, clp = nfs4_get_client(&server->addr.sin_addr); if (!clp) { - printk(KERN_WARNING "NFS: failed to create NFS4 client.\n"); + dprintk("%s: failed to create NFS4 client.\n", __FUNCTION__); return -EIO; } /* Now create transport and client */ authflavour = RPC_AUTH_UNIX; if (data->auth_flavourlen != 0) { - if (data->auth_flavourlen > 1) - printk(KERN_INFO "NFS: cannot yet deal with multiple auth flavours.\n"); + if (data->auth_flavourlen != 1) { + dprintk("%s: Invalid number of RPC auth flavours %d.\n", + __FUNCTION__, data->auth_flavourlen); + err = -EINVAL; + goto out_fail; + } if (copy_from_user(&authflavour, data->auth_flavours, sizeof(authflavour))) { err = -EFAULT; goto out_fail; @@ -1613,16 +1639,18 @@ static int nfs4_fill_super(struct super_block *sb, struct nfs4_mount_data *data, xprt = xprt_create_proto(proto, &server->addr, &timeparms); if (IS_ERR(xprt)) { up_write(&clp->cl_sem); - printk(KERN_WARNING "NFS: cannot create RPC transport.\n"); err = PTR_ERR(xprt); + dprintk("%s: cannot create RPC transport. Error = %d\n", + __FUNCTION__, err); goto out_fail; } clnt = rpc_create_client(xprt, server->hostname, &nfs_program, server->rpc_ops->version, authflavour); if (IS_ERR(clnt)) { up_write(&clp->cl_sem); - printk(KERN_WARNING "NFS: cannot create RPC client.\n"); err = PTR_ERR(clnt); + dprintk("%s: cannot create RPC client. Error = %d\n", + __FUNCTION__, err); goto out_fail; } clnt->cl_intr = 1; @@ -1654,20 +1682,22 @@ static int nfs4_fill_super(struct super_block *sb, struct nfs4_mount_data *data, clp = NULL; if (IS_ERR(clnt)) { - printk(KERN_WARNING "NFS: cannot create RPC client.\n"); - return PTR_ERR(clnt); + err = PTR_ERR(clnt); + dprintk("%s: cannot create RPC client. Error = %d\n", + __FUNCTION__, err); + return err; } server->client = clnt; if (server->nfs4_state->cl_idmap == NULL) { - printk(KERN_WARNING "NFS: failed to create idmapper.\n"); + dprintk("%s: failed to create idmapper.\n", __FUNCTION__); return -ENOMEM; } if (clnt->cl_auth->au_flavor != authflavour) { if (rpcauth_create(authflavour, clnt) == NULL) { - printk(KERN_WARNING "NFS: couldn't create credcache!\n"); + dprintk("%s: couldn't create credcache!\n", __FUNCTION__); return -ENOMEM; } } @@ -1728,8 +1758,12 @@ static struct super_block *nfs4_get_sb(struct file_system_type *fs_type, struct nfs4_mount_data *data = raw_data; void *p; - if (!data) { - printk("nfs_read_super: missing data argument\n"); + if (data == NULL) { + dprintk("%s: missing data argument\n", __FUNCTION__); + return ERR_PTR(-EINVAL); + } + if (data->version <= 0 || data->version > NFS4_MOUNT_VERSION) { + dprintk("%s: bad mount version\n", __FUNCTION__); return ERR_PTR(-EINVAL); } @@ -1740,11 +1774,6 @@ static struct super_block *nfs4_get_sb(struct file_system_type *fs_type, /* Zero out the NFS state stuff */ init_nfsv4_state(server); - if (data->version != NFS4_MOUNT_VERSION) { - printk("nfs warning: mount version %s than kernel\n", - data->version < NFS4_MOUNT_VERSION ? "older" : "newer"); - } - p = nfs_copy_user_string(NULL, &data->hostname, 256); if (IS_ERR(p)) goto out_err; @@ -1771,11 +1800,20 @@ static struct super_block *nfs4_get_sb(struct file_system_type *fs_type, } if (server->addr.sin_family != AF_INET || server->addr.sin_addr.s_addr == INADDR_ANY) { - printk("NFS: mount program didn't pass remote IP address!\n"); + dprintk("%s: mount program didn't pass remote IP address!\n", + __FUNCTION__); s = ERR_PTR(-EINVAL); goto out_free; } + /* Fire up rpciod if not yet running */ + s = ERR_PTR(rpciod_up()); + if (IS_ERR(s)) { + dprintk("%s: couldn't start rpciod! Error = %ld\n", + __FUNCTION__, PTR_ERR(s)); + goto out_free; + } + s = sget(fs_type, nfs4_compare_super, nfs_set_super, server); if (IS_ERR(s) || s->s_root) @@ -1783,13 +1821,6 @@ static struct super_block *nfs4_get_sb(struct file_system_type *fs_type, s->s_flags = flags; - /* Fire up rpciod if not yet running */ - if (rpciod_up() != 0) { - printk(KERN_WARNING "NFS: couldn't start rpciod!\n"); - s = ERR_PTR(-EIO); - goto out_free; - } - error = nfs4_fill_super(s, data, flags & MS_VERBOSE ? 1 : 0); if (error) { up_write(&s->s_umount); -- cgit v1.2.3-59-g8ed1b From 4ce79717ce32a9f88c1ddce4b9658556cb59d37a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 22 Jun 2005 17:16:21 +0000 Subject: [PATCH] NFS: Header file cleanup... - Move NFSv4 state definitions into a private header file. - Clean up gunk in nfs_fs.h Signed-off-by: Trond Myklebust --- fs/nfs/callback.c | 1 + fs/nfs/callback_proc.c | 1 + fs/nfs/callback_xdr.c | 1 + fs/nfs/delegation.c | 1 + fs/nfs/dir.c | 1 + fs/nfs/idmap.c | 1 + fs/nfs/inode.c | 1 + fs/nfs/nfs4_fs.h | 250 +++++++++++++++++++++++++++++++++++++++++++++++++ fs/nfs/nfs4proc.c | 5 +- fs/nfs/nfs4renewd.c | 1 + fs/nfs/nfs4state.c | 12 +-- fs/nfs/nfs4xdr.c | 8 +- include/linux/nfs_fs.h | 241 ----------------------------------------------- 13 files changed, 264 insertions(+), 260 deletions(-) create mode 100644 fs/nfs/nfs4_fs.h diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index 560d6175dd58..f2ca782aba33 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -14,6 +14,7 @@ #include #include #include +#include "nfs4_fs.h" #include "callback.h" #define NFSDBG_FACILITY NFSDBG_CALLBACK diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index ece27e42b93b..65f1e19e4d19 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -8,6 +8,7 @@ #include #include #include +#include "nfs4_fs.h" #include "callback.h" #include "delegation.h" diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index d271df9df2b2..c99677ec58f8 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c @@ -10,6 +10,7 @@ #include #include #include +#include "nfs4_fs.h" #include "callback.h" #define CB_OP_TAGLEN_MAXSZ (512) diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 5b9c60f97791..d7f7eb669d03 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -16,6 +16,7 @@ #include #include +#include "nfs4_fs.h" #include "delegation.h" static struct nfs_delegation *nfs_alloc_delegation(void) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index ff6155f5e8d9..9ccb15e86967 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -32,6 +32,7 @@ #include #include +#include "nfs4_fs.h" #include "delegation.h" #define NFS_PARANOIA 1 diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c index 87f4f9aeac86..ffb8df91dc34 100644 --- a/fs/nfs/idmap.c +++ b/fs/nfs/idmap.c @@ -50,6 +50,7 @@ #include #include +#include "nfs4_fs.h" #define IDMAP_HASH_SZ 128 diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 32ddcf69e9ac..c80a81ff59c6 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -39,6 +39,7 @@ #include #include +#include "nfs4_fs.h" #include "delegation.h" #define NFSDBG_FACILITY NFSDBG_VFS diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h new file mode 100644 index 000000000000..85cf3bd36921 --- /dev/null +++ b/fs/nfs/nfs4_fs.h @@ -0,0 +1,250 @@ +/* + * linux/fs/nfs/nfs4_fs.h + * + * Copyright (C) 2005 Trond Myklebust + * + * NFSv4-specific filesystem definitions and declarations + */ + +#ifndef __LINUX_FS_NFS_NFS4_FS_H +#define __LINUX_FS_NFS_NFS4_FS_H + +#ifdef CONFIG_NFS_V4 + +struct idmap; + +/* + * In a seqid-mutating op, this macro controls which error return + * values trigger incrementation of the seqid. + * + * from rfc 3010: + * The client MUST monotonically increment the sequence number for the + * CLOSE, LOCK, LOCKU, OPEN, OPEN_CONFIRM, and OPEN_DOWNGRADE + * operations. This is true even in the event that the previous + * operation that used the sequence number received an error. The only + * exception to this rule is if the previous operation received one of + * the following errors: NFSERR_STALE_CLIENTID, NFSERR_STALE_STATEID, + * NFSERR_BAD_STATEID, NFSERR_BAD_SEQID, NFSERR_BADXDR, + * NFSERR_RESOURCE, NFSERR_NOFILEHANDLE. + * + */ +#define seqid_mutating_err(err) \ +(((err) != NFSERR_STALE_CLIENTID) && \ + ((err) != NFSERR_STALE_STATEID) && \ + ((err) != NFSERR_BAD_STATEID) && \ + ((err) != NFSERR_BAD_SEQID) && \ + ((err) != NFSERR_BAD_XDR) && \ + ((err) != NFSERR_RESOURCE) && \ + ((err) != NFSERR_NOFILEHANDLE)) + +enum nfs4_client_state { + NFS4CLNT_OK = 0, +}; + +/* + * The nfs4_client identifies our client state to the server. + */ +struct nfs4_client { + struct list_head cl_servers; /* Global list of servers */ + struct in_addr cl_addr; /* Server identifier */ + u64 cl_clientid; /* constant */ + nfs4_verifier cl_confirm; + unsigned long cl_state; + + u32 cl_lockowner_id; + + /* + * The following rwsem ensures exclusive access to the server + * while we recover the state following a lease expiration. + */ + struct rw_semaphore cl_sem; + + struct list_head cl_delegations; + struct list_head cl_state_owners; + struct list_head cl_unused; + int cl_nunused; + spinlock_t cl_lock; + atomic_t cl_count; + + struct rpc_clnt * cl_rpcclient; + struct rpc_cred * cl_cred; + + struct list_head cl_superblocks; /* List of nfs_server structs */ + + unsigned long cl_lease_time; + unsigned long cl_last_renewal; + struct work_struct cl_renewd; + struct work_struct cl_recoverd; + + wait_queue_head_t cl_waitq; + struct rpc_wait_queue cl_rpcwaitq; + + /* used for the setclientid verifier */ + struct timespec cl_boot_time; + + /* idmapper */ + struct idmap * cl_idmap; + + /* Our own IP address, as a null-terminated string. + * This is used to generate the clientid, and the callback address. + */ + char cl_ipaddr[16]; + unsigned char cl_id_uniquifier; +}; + +/* + * NFS4 state_owners and lock_owners are simply labels for ordered + * sequences of RPC calls. Their sole purpose is to provide once-only + * semantics by allowing the server to identify replayed requests. + * + * The ->so_sema is held during all state_owner seqid-mutating operations: + * OPEN, OPEN_DOWNGRADE, and CLOSE. Its purpose is to properly serialize + * so_seqid. + */ +struct nfs4_state_owner { + struct list_head so_list; /* per-clientid list of state_owners */ + struct nfs4_client *so_client; + u32 so_id; /* 32-bit identifier, unique */ + struct semaphore so_sema; + u32 so_seqid; /* protected by so_sema */ + atomic_t so_count; + + struct rpc_cred *so_cred; /* Associated cred */ + struct list_head so_states; + struct list_head so_delegations; +}; + +/* + * struct nfs4_state maintains the client-side state for a given + * (state_owner,inode) tuple (OPEN) or state_owner (LOCK). + * + * OPEN: + * In order to know when to OPEN_DOWNGRADE or CLOSE the state on the server, + * we need to know how many files are open for reading or writing on a + * given inode. This information too is stored here. + * + * LOCK: one nfs4_state (LOCK) to hold the lock stateid nfs4_state(OPEN) + */ + +struct nfs4_lock_state { + struct list_head ls_locks; /* Other lock stateids */ + fl_owner_t ls_owner; /* POSIX lock owner */ +#define NFS_LOCK_INITIALIZED 1 + int ls_flags; + u32 ls_seqid; + u32 ls_id; + nfs4_stateid ls_stateid; + atomic_t ls_count; +}; + +/* bits for nfs4_state->flags */ +enum { + LK_STATE_IN_USE, + NFS_DELEGATED_STATE, +}; + +struct nfs4_state { + struct list_head open_states; /* List of states for the same state_owner */ + struct list_head inode_states; /* List of states for the same inode */ + struct list_head lock_states; /* List of subservient lock stateids */ + + struct nfs4_state_owner *owner; /* Pointer to the open owner */ + struct inode *inode; /* Pointer to the inode */ + + unsigned long flags; /* Do we hold any locks? */ + struct semaphore lock_sema; /* Serializes file locking operations */ + rwlock_t state_lock; /* Protects the lock_states list */ + + nfs4_stateid stateid; + + unsigned int nreaders; + unsigned int nwriters; + int state; /* State on the server (R,W, or RW) */ + atomic_t count; +}; + + +struct nfs4_exception { + long timeout; + int retry; +}; + +struct nfs4_state_recovery_ops { + int (*recover_open)(struct nfs4_state_owner *, struct nfs4_state *); + int (*recover_lock)(struct nfs4_state *, struct file_lock *); +}; + +extern struct dentry_operations nfs4_dentry_operations; +extern struct inode_operations nfs4_dir_inode_operations; + +/* nfs4proc.c */ +extern int nfs4_map_errors(int err); +extern int nfs4_proc_setclientid(struct nfs4_client *, u32, unsigned short); +extern int nfs4_proc_setclientid_confirm(struct nfs4_client *); +extern int nfs4_proc_async_renew(struct nfs4_client *); +extern int nfs4_proc_renew(struct nfs4_client *); +extern int nfs4_do_close(struct inode *inode, struct nfs4_state *state, mode_t mode); +extern struct inode *nfs4_atomic_open(struct inode *, struct dentry *, struct nameidata *); +extern int nfs4_open_revalidate(struct inode *, struct dentry *, int); + +extern struct nfs4_state_recovery_ops nfs4_reboot_recovery_ops; +extern struct nfs4_state_recovery_ops nfs4_network_partition_recovery_ops; + +extern const u32 nfs4_fattr_bitmap[2]; +extern const u32 nfs4_statfs_bitmap[2]; +extern const u32 nfs4_pathconf_bitmap[2]; +extern const u32 nfs4_fsinfo_bitmap[2]; + +/* nfs4renewd.c */ +extern void nfs4_schedule_state_renewal(struct nfs4_client *); +extern void nfs4_renewd_prepare_shutdown(struct nfs_server *); +extern void nfs4_kill_renewd(struct nfs4_client *); +extern void nfs4_renew_state(void *); + +/* nfs4state.c */ +extern void init_nfsv4_state(struct nfs_server *); +extern void destroy_nfsv4_state(struct nfs_server *); +extern struct nfs4_client *nfs4_get_client(struct in_addr *); +extern void nfs4_put_client(struct nfs4_client *clp); +extern int nfs4_init_client(struct nfs4_client *clp); +extern struct nfs4_client *nfs4_find_client(struct in_addr *); +extern u32 nfs4_alloc_lockowner_id(struct nfs4_client *); + +extern struct nfs4_state_owner * nfs4_get_state_owner(struct nfs_server *, struct rpc_cred *); +extern void nfs4_put_state_owner(struct nfs4_state_owner *); +extern void nfs4_drop_state_owner(struct nfs4_state_owner *); +extern struct nfs4_state * nfs4_get_open_state(struct inode *, struct nfs4_state_owner *); +extern void nfs4_put_open_state(struct nfs4_state *); +extern void nfs4_close_state(struct nfs4_state *, mode_t); +extern struct nfs4_state *nfs4_find_state(struct inode *, struct rpc_cred *, mode_t mode); +extern void nfs4_increment_seqid(int status, struct nfs4_state_owner *sp); +extern void nfs4_schedule_state_recovery(struct nfs4_client *); +extern struct nfs4_lock_state *nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t); +extern struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_owner_t); +extern void nfs4_put_lock_state(struct nfs4_lock_state *state); +extern void nfs4_increment_lock_seqid(int status, struct nfs4_lock_state *ls); +extern void nfs4_notify_setlk(struct nfs4_state *, struct file_lock *, struct nfs4_lock_state *); +extern void nfs4_notify_unlck(struct nfs4_state *, struct file_lock *, struct nfs4_lock_state *); +extern void nfs4_copy_stateid(nfs4_stateid *, struct nfs4_state *, fl_owner_t); + +extern const nfs4_stateid zero_stateid; + +/* nfs4xdr.c */ +extern uint32_t *nfs4_decode_dirent(uint32_t *p, struct nfs_entry *entry, int plus); +extern struct rpc_procinfo nfs4_procedures[]; + +struct nfs4_mount_data; + +/* callback_xdr.c */ +extern struct svc_version nfs4_callback_version1; + +#else + +#define init_nfsv4_state(server) do { } while (0) +#define destroy_nfsv4_state(server) do { } while (0) +#define nfs4_put_state_owner(inode, owner) do { } while (0) +#define nfs4_put_open_state(state) do { } while (0) +#define nfs4_close_state(a, b) do { } while (0) + +#endif /* CONFIG_NFS_V4 */ +#endif /* __LINUX_FS_NFS_NFS4_FS.H */ diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 1d5cb3e80c3e..a69c02b206c1 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -48,6 +48,7 @@ #include #include +#include "nfs4_fs.h" #include "delegation.h" #define NFSDBG_FACILITY NFSDBG_PROC @@ -62,8 +63,6 @@ static int nfs4_handle_exception(struct nfs_server *server, int errorcode, struc extern u32 *nfs4_decode_dirent(u32 *p, struct nfs_entry *entry, int plus); extern struct rpc_procinfo nfs4_procedures[]; -extern nfs4_stateid zero_stateid; - /* Prevent leaks of NFSv4 errors into userland */ int nfs4_map_errors(int err) { @@ -104,7 +103,7 @@ const u32 nfs4_statfs_bitmap[2] = { | FATTR4_WORD1_SPACE_TOTAL }; -u32 nfs4_pathconf_bitmap[2] = { +const u32 nfs4_pathconf_bitmap[2] = { FATTR4_WORD0_MAXLINK | FATTR4_WORD0_MAXNAME, 0 diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c index 667e06f1c647..a3001628ad32 100644 --- a/fs/nfs/nfs4renewd.c +++ b/fs/nfs/nfs4renewd.c @@ -53,6 +53,7 @@ #include #include #include +#include "nfs4_fs.h" #define NFSDBG_FACILITY NFSDBG_PROC diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 231cebce3c87..17b187f2d776 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -46,24 +46,18 @@ #include #include +#include "nfs4_fs.h" #include "callback.h" #include "delegation.h" #define OPENOWNER_POOL_SIZE 8 -static DEFINE_SPINLOCK(state_spinlock); - -nfs4_stateid zero_stateid; - -#if 0 -nfs4_stateid one_stateid = - { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; -#endif +const nfs4_stateid zero_stateid; +static DEFINE_SPINLOCK(state_spinlock); static LIST_HEAD(nfs4_clientid_list); static void nfs4_recover_state(void *); -extern void nfs4_renew_state(void *); void init_nfsv4_state(struct nfs_server *server) diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 5f4de05763c9..e86406eff0eb 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -51,6 +51,7 @@ #include #include #include +#include "nfs4_fs.h" #define NFSDBG_FACILITY NFSDBG_XDR @@ -660,8 +661,6 @@ static int encode_getattr_two(struct xdr_stream *xdr, uint32_t bm0, uint32_t bm1 static int encode_getfattr(struct xdr_stream *xdr, const u32* bitmask) { - extern u32 nfs4_fattr_bitmap[]; - return encode_getattr_two(xdr, bitmask[0] & nfs4_fattr_bitmap[0], bitmask[1] & nfs4_fattr_bitmap[1]); @@ -669,8 +668,6 @@ static int encode_getfattr(struct xdr_stream *xdr, const u32* bitmask) static int encode_fsinfo(struct xdr_stream *xdr, const u32* bitmask) { - extern u32 nfs4_fsinfo_bitmap[]; - return encode_getattr_two(xdr, bitmask[0] & nfs4_fsinfo_bitmap[0], bitmask[1] & nfs4_fsinfo_bitmap[1]); } @@ -969,7 +966,6 @@ static int encode_putrootfh(struct xdr_stream *xdr) static void encode_stateid(struct xdr_stream *xdr, const struct nfs_open_context *ctx) { - extern nfs4_stateid zero_stateid; nfs4_stateid stateid; uint32_t *p; @@ -1697,7 +1693,6 @@ static int nfs4_xdr_enc_fsinfo(struct rpc_rqst *req, uint32_t *p, struct nfs4_fs */ static int nfs4_xdr_enc_pathconf(struct rpc_rqst *req, uint32_t *p, const struct nfs4_pathconf_arg *args) { - extern u32 nfs4_pathconf_bitmap[2]; struct xdr_stream xdr; struct compound_hdr hdr = { .nops = 2, @@ -1718,7 +1713,6 @@ static int nfs4_xdr_enc_pathconf(struct rpc_rqst *req, uint32_t *p, const struct */ static int nfs4_xdr_enc_statfs(struct rpc_rqst *req, uint32_t *p, const struct nfs4_statfs_arg *args) { - extern u32 nfs4_statfs_bitmap[]; struct xdr_stream xdr; struct compound_hdr hdr = { .nops = 2, diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index dbac7f363e5d..fb33e7655cfa 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -15,7 +15,6 @@ #include #include #include -#include #include @@ -29,7 +28,6 @@ #include #include #include -#include #include /* @@ -43,13 +41,6 @@ #define NFS_MAX_FILE_IO_BUFFER_SIZE 32768 #define NFS_DEF_FILE_IO_BUFFER_SIZE 4096 -/* - * The upper limit on timeouts for the exponential backoff algorithm. - */ -#define NFS_WRITEBACK_DELAY (5*HZ) -#define NFS_WRITEBACK_LOCKDELAY (60*HZ) -#define NFS_COMMIT_DELAY (5*HZ) - /* * superblock magic number for NFS */ @@ -60,9 +51,6 @@ */ #define NFS_RPC_SWAPFLAGS (RPC_TASK_SWAPPER|RPC_TASK_ROOTCREDS) -#define NFS_RW_SYNC 0x0001 /* O_SYNC handling */ -#define NFS_RW_SWAP 0x0002 /* This is a swap request */ - /* * When flushing a cluster of dirty pages, there can be different * strategies: @@ -434,11 +422,6 @@ static inline void nfs_writedata_free(struct nfs_write_data *p) mempool_free(p, nfs_wdata_mempool); } -/* Hack for future NFS swap support */ -#ifndef IS_SWAPFILE -# define IS_SWAPFILE(inode) (0) -#endif - /* * linux/fs/nfs/read.c */ @@ -515,230 +498,6 @@ extern void * nfs_root_data(void); #define NFS_JUKEBOX_RETRY_TIME (5 * HZ) -#ifdef CONFIG_NFS_V4 - -struct idmap; - -/* - * In a seqid-mutating op, this macro controls which error return - * values trigger incrementation of the seqid. - * - * from rfc 3010: - * The client MUST monotonically increment the sequence number for the - * CLOSE, LOCK, LOCKU, OPEN, OPEN_CONFIRM, and OPEN_DOWNGRADE - * operations. This is true even in the event that the previous - * operation that used the sequence number received an error. The only - * exception to this rule is if the previous operation received one of - * the following errors: NFSERR_STALE_CLIENTID, NFSERR_STALE_STATEID, - * NFSERR_BAD_STATEID, NFSERR_BAD_SEQID, NFSERR_BADXDR, - * NFSERR_RESOURCE, NFSERR_NOFILEHANDLE. - * - */ -#define seqid_mutating_err(err) \ -(((err) != NFSERR_STALE_CLIENTID) && \ - ((err) != NFSERR_STALE_STATEID) && \ - ((err) != NFSERR_BAD_STATEID) && \ - ((err) != NFSERR_BAD_SEQID) && \ - ((err) != NFSERR_BAD_XDR) && \ - ((err) != NFSERR_RESOURCE) && \ - ((err) != NFSERR_NOFILEHANDLE)) - -enum nfs4_client_state { - NFS4CLNT_OK = 0, -}; - -/* - * The nfs4_client identifies our client state to the server. - */ -struct nfs4_client { - struct list_head cl_servers; /* Global list of servers */ - struct in_addr cl_addr; /* Server identifier */ - u64 cl_clientid; /* constant */ - nfs4_verifier cl_confirm; - unsigned long cl_state; - - u32 cl_lockowner_id; - - /* - * The following rwsem ensures exclusive access to the server - * while we recover the state following a lease expiration. - */ - struct rw_semaphore cl_sem; - - struct list_head cl_delegations; - struct list_head cl_state_owners; - struct list_head cl_unused; - int cl_nunused; - spinlock_t cl_lock; - atomic_t cl_count; - - struct rpc_clnt * cl_rpcclient; - struct rpc_cred * cl_cred; - - struct list_head cl_superblocks; /* List of nfs_server structs */ - - unsigned long cl_lease_time; - unsigned long cl_last_renewal; - struct work_struct cl_renewd; - struct work_struct cl_recoverd; - - wait_queue_head_t cl_waitq; - struct rpc_wait_queue cl_rpcwaitq; - - /* used for the setclientid verifier */ - struct timespec cl_boot_time; - - /* idmapper */ - struct idmap * cl_idmap; - - /* Our own IP address, as a null-terminated string. - * This is used to generate the clientid, and the callback address. - */ - char cl_ipaddr[16]; - unsigned char cl_id_uniquifier; -}; - -/* - * NFS4 state_owners and lock_owners are simply labels for ordered - * sequences of RPC calls. Their sole purpose is to provide once-only - * semantics by allowing the server to identify replayed requests. - * - * The ->so_sema is held during all state_owner seqid-mutating operations: - * OPEN, OPEN_DOWNGRADE, and CLOSE. Its purpose is to properly serialize - * so_seqid. - */ -struct nfs4_state_owner { - struct list_head so_list; /* per-clientid list of state_owners */ - struct nfs4_client *so_client; - u32 so_id; /* 32-bit identifier, unique */ - struct semaphore so_sema; - u32 so_seqid; /* protected by so_sema */ - atomic_t so_count; - - struct rpc_cred *so_cred; /* Associated cred */ - struct list_head so_states; - struct list_head so_delegations; -}; - -/* - * struct nfs4_state maintains the client-side state for a given - * (state_owner,inode) tuple (OPEN) or state_owner (LOCK). - * - * OPEN: - * In order to know when to OPEN_DOWNGRADE or CLOSE the state on the server, - * we need to know how many files are open for reading or writing on a - * given inode. This information too is stored here. - * - * LOCK: one nfs4_state (LOCK) to hold the lock stateid nfs4_state(OPEN) - */ - -struct nfs4_lock_state { - struct list_head ls_locks; /* Other lock stateids */ - fl_owner_t ls_owner; /* POSIX lock owner */ -#define NFS_LOCK_INITIALIZED 1 - int ls_flags; - u32 ls_seqid; - u32 ls_id; - nfs4_stateid ls_stateid; - atomic_t ls_count; -}; - -/* bits for nfs4_state->flags */ -enum { - LK_STATE_IN_USE, - NFS_DELEGATED_STATE, -}; - -struct nfs4_state { - struct list_head open_states; /* List of states for the same state_owner */ - struct list_head inode_states; /* List of states for the same inode */ - struct list_head lock_states; /* List of subservient lock stateids */ - - struct nfs4_state_owner *owner; /* Pointer to the open owner */ - struct inode *inode; /* Pointer to the inode */ - - unsigned long flags; /* Do we hold any locks? */ - struct semaphore lock_sema; /* Serializes file locking operations */ - rwlock_t state_lock; /* Protects the lock_states list */ - - nfs4_stateid stateid; - - unsigned int nreaders; - unsigned int nwriters; - int state; /* State on the server (R,W, or RW) */ - atomic_t count; -}; - - -struct nfs4_exception { - long timeout; - int retry; -}; - -struct nfs4_state_recovery_ops { - int (*recover_open)(struct nfs4_state_owner *, struct nfs4_state *); - int (*recover_lock)(struct nfs4_state *, struct file_lock *); -}; - -extern struct dentry_operations nfs4_dentry_operations; -extern struct inode_operations nfs4_dir_inode_operations; - -/* nfs4proc.c */ -extern int nfs4_map_errors(int err); -extern int nfs4_proc_setclientid(struct nfs4_client *, u32, unsigned short); -extern int nfs4_proc_setclientid_confirm(struct nfs4_client *); -extern int nfs4_proc_async_renew(struct nfs4_client *); -extern int nfs4_proc_renew(struct nfs4_client *); -extern int nfs4_do_close(struct inode *inode, struct nfs4_state *state, mode_t mode); -extern struct inode *nfs4_atomic_open(struct inode *, struct dentry *, struct nameidata *); -extern int nfs4_open_revalidate(struct inode *, struct dentry *, int); - -extern struct nfs4_state_recovery_ops nfs4_reboot_recovery_ops; -extern struct nfs4_state_recovery_ops nfs4_network_partition_recovery_ops; - -/* nfs4renewd.c */ -extern void nfs4_schedule_state_renewal(struct nfs4_client *); -extern void nfs4_renewd_prepare_shutdown(struct nfs_server *); -extern void nfs4_kill_renewd(struct nfs4_client *); - -/* nfs4state.c */ -extern void init_nfsv4_state(struct nfs_server *); -extern void destroy_nfsv4_state(struct nfs_server *); -extern struct nfs4_client *nfs4_get_client(struct in_addr *); -extern void nfs4_put_client(struct nfs4_client *clp); -extern int nfs4_init_client(struct nfs4_client *clp); -extern struct nfs4_client *nfs4_find_client(struct in_addr *); -extern u32 nfs4_alloc_lockowner_id(struct nfs4_client *); - -extern struct nfs4_state_owner * nfs4_get_state_owner(struct nfs_server *, struct rpc_cred *); -extern void nfs4_put_state_owner(struct nfs4_state_owner *); -extern void nfs4_drop_state_owner(struct nfs4_state_owner *); -extern struct nfs4_state * nfs4_get_open_state(struct inode *, struct nfs4_state_owner *); -extern void nfs4_put_open_state(struct nfs4_state *); -extern void nfs4_close_state(struct nfs4_state *, mode_t); -extern struct nfs4_state *nfs4_find_state(struct inode *, struct rpc_cred *, mode_t mode); -extern void nfs4_increment_seqid(int status, struct nfs4_state_owner *sp); -extern void nfs4_schedule_state_recovery(struct nfs4_client *); -extern struct nfs4_lock_state *nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t); -extern struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_owner_t); -extern void nfs4_put_lock_state(struct nfs4_lock_state *state); -extern void nfs4_increment_lock_seqid(int status, struct nfs4_lock_state *ls); -extern void nfs4_notify_setlk(struct nfs4_state *, struct file_lock *, struct nfs4_lock_state *); -extern void nfs4_notify_unlck(struct nfs4_state *, struct file_lock *, struct nfs4_lock_state *); -extern void nfs4_copy_stateid(nfs4_stateid *, struct nfs4_state *, fl_owner_t); - - - -struct nfs4_mount_data; -#else -#define init_nfsv4_state(server) do { } while (0) -#define destroy_nfsv4_state(server) do { } while (0) -#define nfs4_put_state_owner(inode, owner) do { } while (0) -#define nfs4_put_open_state(state) do { } while (0) -#define nfs4_close_state(a, b) do { } while (0) -#define nfs4_renewd_prepare_shutdown(server) do { } while (0) -#endif - #endif /* __KERNEL__ */ /* -- cgit v1.2.3-59-g8ed1b From a656db998785324a818005bcf71bae6dcbbb3cf5 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 22 Jun 2005 17:16:21 +0000 Subject: [PATCH] NFS: Remove unused NFS inode field readdir_timestamp. Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 8 +++----- include/linux/nfs_fs.h | 1 - 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 9ccb15e86967..dffa21abd3ea 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -165,12 +165,10 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page *page) NFS_FLAGS(inode) |= NFS_INO_INVALID_ATIME; /* Ensure consistent page alignment of the data. * Note: assumes we have exclusive access to this mapping either - * throught inode->i_sem or some other mechanism. + * through inode->i_sem or some other mechanism. */ - if (page->index == 0) { - invalidate_inode_pages(inode->i_mapping); - NFS_I(inode)->readdir_timestamp = timestamp; - } + if (page->index == 0) + invalidate_inode_pages2_range(inode->i_mapping, PAGE_CACHE_SIZE, -1); unlock_page(page); return 0; error: diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index fb33e7655cfa..68d5aae89972 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -128,7 +128,6 @@ struct nfs_inode { * * mtime != read_cache_mtime */ - unsigned long readdir_timestamp; unsigned long read_cache_jiffies; unsigned long attrtimeo; unsigned long attrtimeo_timestamp; -- cgit v1.2.3-59-g8ed1b From 96651ab341cde0fee940ec837f323d711cbfa7d5 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 22 Jun 2005 17:16:21 +0000 Subject: [PATCH] RPC: Shrink struct rpc_task by switching to wait_on_bit() Signed-off-by: Trond Myklebust --- include/linux/sunrpc/sched.h | 1 - net/sunrpc/sched.c | 31 ++++++++++++++++++------------- 2 files changed, 18 insertions(+), 14 deletions(-) diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index 99d17ed7cebb..4d77e90d0b30 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -31,7 +31,6 @@ struct rpc_wait_queue; struct rpc_wait { struct list_head list; /* wait queue links */ struct list_head links; /* Links to related tasks */ - wait_queue_head_t waitq; /* sync: sleep on this q */ struct rpc_wait_queue * rpc_waitq; /* RPC wait queue we're on */ }; diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index cc298fa4b81d..2d9eb7fbd521 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -290,7 +290,7 @@ static void rpc_make_runnable(struct rpc_task *task) return; } } else - wake_up(&task->u.tk_wait.waitq); + wake_up_bit(&task->tk_runstate, RPC_TASK_QUEUED); } /* @@ -578,6 +578,14 @@ static inline int __rpc_do_exit(struct rpc_task *task) return 1; } +static int rpc_wait_bit_interruptible(void *word) +{ + if (signal_pending(current)) + return -ERESTARTSYS; + schedule(); + return 0; +} + /* * This is the RPC `scheduler' (or rather, the finite state machine). */ @@ -648,22 +656,21 @@ static int __rpc_execute(struct rpc_task *task) /* sync task: sleep here */ dprintk("RPC: %4d sync task going to sleep\n", task->tk_pid); - if (RPC_TASK_UNINTERRUPTIBLE(task)) { - __wait_event(task->u.tk_wait.waitq, !RPC_IS_QUEUED(task)); - } else { - __wait_event_interruptible(task->u.tk_wait.waitq, !RPC_IS_QUEUED(task), status); + /* Note: Caller should be using rpc_clnt_sigmask() */ + status = out_of_line_wait_on_bit(&task->tk_runstate, + RPC_TASK_QUEUED, rpc_wait_bit_interruptible, + TASK_INTERRUPTIBLE); + if (status == -ERESTARTSYS) { /* * When a sync task receives a signal, it exits with * -ERESTARTSYS. In order to catch any callbacks that * clean up after sleeping on some queue, we don't * break the loop here, but go around once more. */ - if (status == -ERESTARTSYS) { - dprintk("RPC: %4d got signal\n", task->tk_pid); - task->tk_flags |= RPC_TASK_KILLED; - rpc_exit(task, -ERESTARTSYS); - rpc_wake_up_task(task); - } + dprintk("RPC: %4d got signal\n", task->tk_pid); + task->tk_flags |= RPC_TASK_KILLED; + rpc_exit(task, -ERESTARTSYS); + rpc_wake_up_task(task); } rpc_set_running(task); dprintk("RPC: %4d sync task resuming\n", task->tk_pid); @@ -766,8 +773,6 @@ void rpc_init_task(struct rpc_task *task, struct rpc_clnt *clnt, rpc_action call /* Initialize workqueue for async tasks */ task->tk_workqueue = rpciod_workqueue; - if (!RPC_IS_ASYNC(task)) - init_waitqueue_head(&task->u.tk_wait.waitq); if (clnt) { atomic_inc(&clnt->cl_users); -- cgit v1.2.3-59-g8ed1b From 464a98bd70bae8c559cfc82af799faf44824ce64 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 22 Jun 2005 17:16:21 +0000 Subject: [PATCH] NFS: cleanup: shrink struct nfs_open_context Remove the wait queue, and replace the functions that depended on it with wait_on_bit(). Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 1 - fs/nfs/pagelist.c | 35 ++++++++++++++++++++++++++++------- include/linux/nfs_fs.h | 1 - 3 files changed, 28 insertions(+), 9 deletions(-) diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index c80a81ff59c6..a38d4b22d1f8 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -848,7 +848,6 @@ struct nfs_open_context *alloc_nfs_open_context(struct dentry *dentry, struct rp ctx->state = NULL; ctx->lockowner = current->files; ctx->error = 0; - init_waitqueue_head(&ctx->waitq); } return ctx; } diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 4f1ba723848d..80777f99a58a 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -107,7 +107,7 @@ void nfs_unlock_request(struct nfs_page *req) smp_mb__before_clear_bit(); clear_bit(PG_BUSY, &req->wb_flags); smp_mb__after_clear_bit(); - wake_up_all(&req->wb_context->waitq); + wake_up_bit(&req->wb_flags, PG_BUSY); nfs_release_request(req); } @@ -180,6 +180,17 @@ nfs_list_add_request(struct nfs_page *req, struct list_head *head) req->wb_list_head = head; } +static int nfs_wait_bit_interruptible(void *word) +{ + int ret = 0; + + if (signal_pending(current)) + ret = -ERESTARTSYS; + else + schedule(); + return ret; +} + /** * nfs_wait_on_request - Wait for a request to complete. * @req: request to wait upon. @@ -190,12 +201,22 @@ nfs_list_add_request(struct nfs_page *req, struct list_head *head) int nfs_wait_on_request(struct nfs_page *req) { - struct inode *inode = req->wb_context->dentry->d_inode; - struct rpc_clnt *clnt = NFS_CLIENT(inode); - - if (!NFS_WBACK_BUSY(req)) - return 0; - return nfs_wait_event(clnt, req->wb_context->waitq, !NFS_WBACK_BUSY(req)); + struct rpc_clnt *clnt = NFS_CLIENT(req->wb_context->dentry->d_inode); + sigset_t oldmask; + int ret = 0; + + if (!test_bit(PG_BUSY, &req->wb_flags)) + goto out; + /* + * Note: the call to rpc_clnt_sigmask() suffices to ensure that we + * are not interrupted if intr flag is not set + */ + rpc_clnt_sigmask(clnt, &oldmask); + ret = out_of_line_wait_on_bit(&req->wb_flags, PG_BUSY, + nfs_wait_bit_interruptible, TASK_INTERRUPTIBLE); + rpc_clnt_sigunmask(clnt, &oldmask); +out: + return ret; } /** diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 68d5aae89972..0b01b96337f8 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -84,7 +84,6 @@ struct nfs_open_context { int error; struct list_head list; - wait_queue_head_t waitq; }; /* -- cgit v1.2.3-59-g8ed1b From 92cfc62cb8412c9563860b1bf70cd4701f03092e Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 22 Jun 2005 17:16:22 +0000 Subject: [PATCH] NFS: Allow NFS versions to support different sets of inode operations. ACL support will require supporting additional inode operations in v4 (getxattr, setxattr, listxattr). This patch allows different protocol versions to support different inode operations by adding a file_inode_ops to the nfs_rpc_ops (to match the existing dir_inode_ops). Signed-off-by: J. Bruce Fields Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 2 +- fs/nfs/nfs3proc.c | 1 + fs/nfs/nfs4proc.c | 1 + fs/nfs/proc.c | 1 + include/linux/nfs_xdr.h | 1 + 5 files changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index a38d4b22d1f8..a82f0340744f 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -686,7 +686,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) /* Why so? Because we want revalidate for devices/FIFOs, and * that's precisely what we have in nfs_file_inode_operations. */ - inode->i_op = &nfs_file_inode_operations; + inode->i_op = NFS_SB(sb)->rpc_ops->file_inode_ops; if (S_ISREG(inode->i_mode)) { inode->i_fop = &nfs_file_operations; inode->i_data.a_ops = &nfs_file_aops; diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 3878494dfc2c..53953a775714 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -826,6 +826,7 @@ struct nfs_rpc_ops nfs_v3_clientops = { .version = 3, /* protocol version */ .dentry_ops = &nfs_dentry_operations, .dir_inode_ops = &nfs_dir_inode_operations, + .file_inode_ops = &nfs_file_inode_operations, .getroot = nfs3_proc_get_root, .getattr = nfs3_proc_getattr, .setattr = nfs3_proc_setattr, diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index a69c02b206c1..a5a8cb3159a0 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2746,6 +2746,7 @@ struct nfs_rpc_ops nfs_v4_clientops = { .version = 4, /* protocol version */ .dentry_ops = &nfs4_dentry_operations, .dir_inode_ops = &nfs4_dir_inode_operations, + .file_inode_ops = &nfs_file_inode_operations, .getroot = nfs4_proc_get_root, .getattr = nfs4_proc_getattr, .setattr = nfs4_proc_setattr, diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index d31b4d6e5a5e..cedf636bcf3c 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -622,6 +622,7 @@ struct nfs_rpc_ops nfs_v2_clientops = { .version = 2, /* protocol version */ .dentry_ops = &nfs_dentry_operations, .dir_inode_ops = &nfs_dir_inode_operations, + .file_inode_ops = &nfs_file_inode_operations, .getroot = nfs_proc_get_root, .getattr = nfs_proc_getattr, .setattr = nfs_proc_setattr, diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 47037d9521cb..5b45bafd9db5 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -667,6 +667,7 @@ struct nfs_rpc_ops { int version; /* Protocol version */ struct dentry_operations *dentry_ops; struct inode_operations *dir_inode_ops; + struct inode_operations *file_inode_ops; int (*getroot) (struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *); -- cgit v1.2.3-59-g8ed1b From ada70d9425bcc5e376fef8591e4e76e204c0834c Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 22 Jun 2005 17:16:22 +0000 Subject: [PATCH] NFS: Add hooks to allow common NFS attribute code to clear cached acls Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 33 ++++++++++++++++++++++++++------- include/linux/nfs_fs.h | 1 + include/linux/nfs_xdr.h | 1 + 3 files changed, 28 insertions(+), 7 deletions(-) diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index a82f0340744f..c45bd52cc1d7 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -64,6 +64,7 @@ static void nfs_clear_inode(struct inode *); static void nfs_umount_begin(struct super_block *); static int nfs_statfs(struct super_block *, struct kstatfs *); static int nfs_show_options(struct seq_file *, struct vfsmount *); +static void nfs_zap_acl_cache(struct inode *); static struct rpc_program nfs_program; @@ -153,6 +154,7 @@ nfs_clear_inode(struct inode *inode) nfs_wb_all(inode); BUG_ON (!list_empty(&nfsi->open_files)); + nfs_zap_acl_cache(inode); cred = nfsi->cache_access.cred; if (cred) put_rpccred(cred); @@ -587,9 +589,19 @@ nfs_zap_caches(struct inode *inode) memset(NFS_COOKIEVERF(inode), 0, sizeof(NFS_COOKIEVERF(inode))); if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)) - nfsi->flags |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS; + nfsi->flags |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; else - nfsi->flags |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS; + nfsi->flags |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; +} + +static void nfs_zap_acl_cache(struct inode *inode) +{ + void (*clear_acl_cache)(struct inode *); + + clear_acl_cache = NFS_PROTO(inode)->clear_acl_cache; + if (clear_acl_cache != NULL) + clear_acl_cache(inode); + NFS_I(inode)->flags &= ~NFS_INO_INVALID_ACL; } /* @@ -789,7 +801,7 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr) } } if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID)) != 0) - NFS_FLAGS(inode) |= NFS_INO_INVALID_ACCESS; + NFS_FLAGS(inode) |= NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; nfs_end_data_update(inode); unlock_kernel(); return error; @@ -1033,6 +1045,8 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) /* This ensures we revalidate dentries */ nfsi->cache_change_attribute++; } + if (flags & NFS_INO_INVALID_ACL) + nfs_zap_acl_cache(inode); dfprintk(PAGECACHE, "NFS: (%s/%Ld) revalidation complete\n", inode->i_sb->s_id, (long long)NFS_FILEID(inode)); @@ -1183,7 +1197,7 @@ int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr) if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO) || inode->i_uid != fattr->uid || inode->i_gid != fattr->gid) - nfsi->flags |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS; + nfsi->flags |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL; /* Has the link count changed? */ if (inode->i_nlink != fattr->nlink) @@ -1292,16 +1306,21 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr, unsign #endif nfsi->change_attr = fattr->change_attr; if (!data_unstable) - invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS; + invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; } - memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime)); + /* If ctime has changed we should definitely clear access+acl caches */ + if (!timespec_equal(&inode->i_ctime, &fattr->ctime)) { + if (!data_unstable) + invalid |= NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; + memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime)); + } memcpy(&inode->i_atime, &fattr->atime, sizeof(inode->i_atime)); if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO) || inode->i_uid != fattr->uid || inode->i_gid != fattr->gid) - invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS; + invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; inode->i_mode = fattr->mode; inode->i_nlink = fattr->nlink; diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 0b01b96337f8..140bdf489f71 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -189,6 +189,7 @@ struct nfs_inode { #define NFS_INO_INVALID_DATA 0x0010 /* cached data is invalid */ #define NFS_INO_INVALID_ATIME 0x0020 /* cached atime is invalid */ #define NFS_INO_INVALID_ACCESS 0x0040 /* cached access cred invalid */ +#define NFS_INO_INVALID_ACL 0x0080 /* cached acls are invalid */ static inline struct nfs_inode *NFS_I(struct inode *inode) { diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 5b45bafd9db5..cf38db59f347 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -714,6 +714,7 @@ struct nfs_rpc_ops { int (*file_open) (struct inode *, struct file *); int (*file_release) (struct inode *, struct file *); int (*lock)(struct file *, int, struct file_lock *); + void (*clear_acl_cache)(struct inode *); }; /* -- cgit v1.2.3-59-g8ed1b From 6b3b5496d7b261d6c9202008dc528e52dbd11e57 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 22 Jun 2005 17:16:22 +0000 Subject: [PATCH] NFSv4: Add {get,set,list}xattr methods for nfs4 Add {get,set,list}xattr methods for nfs4. The new methods are no-ops, to be used by subsequent ACL patch. Signed-off-by: J. Bruce Fields Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 3 +++ fs/nfs/nfs4_fs.h | 7 +++++++ fs/nfs/nfs4proc.c | 39 ++++++++++++++++++++++++++++++++++++++- 3 files changed, 48 insertions(+), 1 deletion(-) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index dffa21abd3ea..5720537bffdd 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -91,6 +91,9 @@ struct inode_operations nfs4_dir_inode_operations = { .permission = nfs_permission, .getattr = nfs_getattr, .setattr = nfs_setattr, + .getxattr = nfs4_getxattr, + .setxattr = nfs4_setxattr, + .listxattr = nfs4_listxattr, }; #endif /* CONFIG_NFS_V4 */ diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 85cf3bd36921..d71f416bd9e5 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -176,6 +176,13 @@ struct nfs4_state_recovery_ops { extern struct dentry_operations nfs4_dentry_operations; extern struct inode_operations nfs4_dir_inode_operations; +extern struct inode_operations nfs4_file_inode_operations; + +/* inode.c */ +extern ssize_t nfs4_getxattr(struct dentry *, const char *, void *, size_t); +extern int nfs4_setxattr(struct dentry *, const char *, const void *, size_t, int); +extern ssize_t nfs4_listxattr(struct dentry *, char *, size_t); + /* nfs4proc.c */ extern int nfs4_map_errors(int err); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index a5a8cb3159a0..1b14d17ae9a4 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2732,6 +2732,34 @@ nfs4_proc_lock(struct file *filp, int cmd, struct file_lock *request) return status; } + +int nfs4_setxattr(struct dentry *dentry, const char *key, const void *buf, + size_t buflen, int flags) +{ + return -EOPNOTSUPP; +} + +/* The getxattr man page suggests returning -ENODATA for unknown attributes, + * and that's what we'll do for e.g. user attributes that haven't been set. + * But we'll follow ext2/ext3's lead by returning -EOPNOTSUPP for unsupported + * attributes in kernel-managed attribute namespaces. */ +ssize_t nfs4_getxattr(struct dentry *dentry, const char *key, void *buf, + size_t buflen) +{ + return -EOPNOTSUPP; +} + +ssize_t nfs4_listxattr(struct dentry *dentry, char *buf, size_t buflen) +{ + ssize_t len = 0; + + if (buf && buflen < len) + return -ERANGE; + if (buf) + memcpy(buf, "", 0); + return 0; +} + struct nfs4_state_recovery_ops nfs4_reboot_recovery_ops = { .recover_open = nfs4_open_reclaim, .recover_lock = nfs4_lock_reclaim, @@ -2742,11 +2770,20 @@ struct nfs4_state_recovery_ops nfs4_network_partition_recovery_ops = { .recover_lock = nfs4_lock_expired, }; +static struct inode_operations nfs4_file_inode_operations = { + .permission = nfs_permission, + .getattr = nfs_getattr, + .setattr = nfs_setattr, + .getxattr = nfs4_getxattr, + .setxattr = nfs4_setxattr, + .listxattr = nfs4_listxattr, +}; + struct nfs_rpc_ops nfs_v4_clientops = { .version = 4, /* protocol version */ .dentry_ops = &nfs4_dentry_operations, .dir_inode_ops = &nfs4_dir_inode_operations, - .file_inode_ops = &nfs_file_inode_operations, + .file_inode_ops = &nfs4_file_inode_operations, .getroot = nfs4_proc_get_root, .getattr = nfs4_proc_getattr, .setattr = nfs4_proc_setattr, -- cgit v1.2.3-59-g8ed1b From 96928206961be05f22c3839f0097b610cc485b5d Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 22 Jun 2005 17:16:22 +0000 Subject: [PATCH] NFSv4: fix fattr size calculations Make nfs4 fattr size calculations more explicit, revising them downward a bit in the process. Signed-off-by: J. Bruce Fields Signed-off-by: Trond Myklebust --- fs/nfs/nfs4xdr.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index e86406eff0eb..8204926bb467 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -83,12 +83,16 @@ static int nfs_stat_to_errno(int); #define encode_getfh_maxsz (op_encode_hdr_maxsz) #define decode_getfh_maxsz (op_decode_hdr_maxsz + 1 + \ ((3+NFS4_FHSIZE) >> 2)) -#define encode_getattr_maxsz (op_encode_hdr_maxsz + 3) +#define nfs4_fattr_bitmap_maxsz 3 +#define encode_getattr_maxsz (op_encode_hdr_maxsz + nfs4_fattr_bitmap_maxsz) #define nfs4_name_maxsz (1 + ((3 + NFS4_MAXNAMLEN) >> 2)) #define nfs4_path_maxsz (1 + ((3 + NFS4_MAXPATHLEN) >> 2)) -#define nfs4_fattr_bitmap_maxsz (36 + 2 * nfs4_name_maxsz) -#define decode_getattr_maxsz (op_decode_hdr_maxsz + 3 + \ - nfs4_fattr_bitmap_maxsz) +/* This is based on getfattr, which uses the most attributes: */ +#define nfs4_fattr_value_maxsz (1 + (1 + 2 + 2 + 4 + 2 + 1 + 1 + 2 + 2 + \ + 3 + 3 + 3 + 2 * nfs4_name_maxsz)) +#define nfs4_fattr_maxsz (nfs4_fattr_bitmap_maxsz + \ + nfs4_fattr_value_maxsz) +#define decode_getattr_maxsz (op_decode_hdr_maxsz + nfs4_fattr_maxsz) #define encode_savefh_maxsz (op_encode_hdr_maxsz) #define decode_savefh_maxsz (op_decode_hdr_maxsz) #define encode_fsinfo_maxsz (op_encode_hdr_maxsz + 2) @@ -123,11 +127,11 @@ static int nfs_stat_to_errno(int); #define encode_symlink_maxsz (op_encode_hdr_maxsz + \ 1 + nfs4_name_maxsz + \ nfs4_path_maxsz + \ - nfs4_fattr_bitmap_maxsz) + nfs4_fattr_maxsz) #define decode_symlink_maxsz (op_decode_hdr_maxsz + 8) #define encode_create_maxsz (op_encode_hdr_maxsz + \ 2 + nfs4_name_maxsz + \ - nfs4_fattr_bitmap_maxsz) + nfs4_fattr_maxsz) #define decode_create_maxsz (op_decode_hdr_maxsz + 8) #define encode_delegreturn_maxsz (op_encode_hdr_maxsz + 4) #define decode_delegreturn_maxsz (op_decode_hdr_maxsz) @@ -206,7 +210,7 @@ static int nfs_stat_to_errno(int); #define NFS4_enc_setattr_sz (compound_encode_hdr_maxsz + \ encode_putfh_maxsz + \ op_encode_hdr_maxsz + 4 + \ - nfs4_fattr_bitmap_maxsz + \ + nfs4_fattr_maxsz + \ encode_getattr_maxsz) #define NFS4_dec_setattr_sz (compound_decode_hdr_maxsz + \ decode_putfh_maxsz + \ -- cgit v1.2.3-59-g8ed1b From 029d105e66e5a90850d5a09dad76815d0bcfcaa3 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 22 Jun 2005 17:16:22 +0000 Subject: [PATCH] NFSv4: Client-side xdr for reading NFSv4 acls Client-side support for NFSv4 acls: xdr encoding and decoding routines for reading acls Signed-off-by: J. Bruce Fields Signed-off-by: Trond Myklebust --- fs/nfs/nfs4xdr.c | 100 ++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/nfs4.h | 1 + include/linux/nfs_xdr.h | 7 ++++ 3 files changed, 108 insertions(+) diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 8204926bb467..6f1c003ee33a 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -365,6 +365,13 @@ static int nfs_stat_to_errno(int); encode_delegreturn_maxsz) #define NFS4_dec_delegreturn_sz (compound_decode_hdr_maxsz + \ decode_delegreturn_maxsz) +#define NFS4_enc_getacl_sz (compound_encode_hdr_maxsz + \ + encode_putfh_maxsz + \ + encode_getattr_maxsz) +#define NFS4_dec_getacl_sz (compound_decode_hdr_maxsz + \ + decode_putfh_maxsz + \ + op_decode_hdr_maxsz + \ + nfs4_fattr_bitmap_maxsz + 1) static struct { unsigned int mode; @@ -1631,6 +1638,34 @@ out: return status; } +/* + * Encode a GETACL request + */ +static int +nfs4_xdr_enc_getacl(struct rpc_rqst *req, uint32_t *p, + struct nfs_getaclargs *args) +{ + struct xdr_stream xdr; + struct rpc_auth *auth = req->rq_task->tk_auth; + struct compound_hdr hdr = { + .nops = 2, + }; + int replen, status; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, &hdr); + status = encode_putfh(&xdr, args->fh); + if (status) + goto out; + status = encode_getattr_two(&xdr, FATTR4_WORD0_ACL, 0); + /* set up reply buffer: */ + replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS4_dec_getacl_sz) << 2; + xdr_inline_pages(&req->rq_rcv_buf, replen, + args->acl_pages, args->acl_pgbase, args->acl_len); +out: + return status; +} + /* * Encode a WRITE request */ @@ -3125,6 +3160,47 @@ static int decode_renew(struct xdr_stream *xdr) return decode_op_hdr(xdr, OP_RENEW); } +static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req, + size_t *acl_len) +{ + uint32_t *savep; + uint32_t attrlen, + bitmap[2] = {0}; + struct kvec *iov = req->rq_rcv_buf.head; + int status; + + *acl_len = 0; + if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0) + goto out; + if ((status = decode_attr_bitmap(xdr, bitmap)) != 0) + goto out; + if ((status = decode_attr_length(xdr, &attrlen, &savep)) != 0) + goto out; + + if (unlikely(bitmap[0] & (FATTR4_WORD0_ACL - 1U))) + return -EIO; + if (likely(bitmap[0] & FATTR4_WORD0_ACL)) { + int hdrlen, recvd; + + /* We ignore &savep and don't do consistency checks on + * the attr length. Let userspace figure it out.... */ + hdrlen = (u8 *)xdr->p - (u8 *)iov->iov_base; + recvd = req->rq_rcv_buf.len - hdrlen; + if (attrlen > recvd) { + printk(KERN_WARNING "NFS: server cheating in getattr" + " acl reply: attrlen %u > recvd %u\n", + attrlen, recvd); + return -EINVAL; + } + if (attrlen <= *acl_len) + xdr_read_pages(xdr, attrlen); + *acl_len = attrlen; + } + +out: + return status; +} + static int decode_savefh(struct xdr_stream *xdr) { @@ -3417,6 +3493,29 @@ out: } +/* + * Decode GETACL response + */ +static int +nfs4_xdr_dec_getacl(struct rpc_rqst *rqstp, uint32_t *p, size_t *acl_len) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (status) + goto out; + status = decode_putfh(&xdr); + if (status) + goto out; + status = decode_getacl(&xdr, rqstp, acl_len); + +out: + return status; +} + /* * Decode CLOSE response */ @@ -4017,6 +4116,7 @@ struct rpc_procinfo nfs4_procedures[] = { PROC(READDIR, enc_readdir, dec_readdir), PROC(SERVER_CAPS, enc_server_caps, dec_server_caps), PROC(DELEGRETURN, enc_delegreturn, dec_delegreturn), + PROC(GETACL, enc_getacl, dec_getacl), }; struct rpc_version nfs_version4 = { diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index 5ca8a8d8ccdf..6ee7e2585af5 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -382,6 +382,7 @@ enum { NFSPROC4_CLNT_READDIR, NFSPROC4_CLNT_SERVER_CAPS, NFSPROC4_CLNT_DELEGRETURN, + NFSPROC4_CLNT_GETACL, }; #endif diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index cf38db59f347..9f5e1d407c7b 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -326,6 +326,13 @@ struct nfs_setattrargs { const u32 * bitmask; }; +struct nfs_getaclargs { + struct nfs_fh * fh; + size_t acl_len; + unsigned int acl_pgbase; + struct page ** acl_pages; +}; + struct nfs_setattrres { struct nfs_fattr * fattr; const struct nfs_server * server; -- cgit v1.2.3-59-g8ed1b From aa1870af92d8f6d6db0883696516a83ff2b695a6 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 22 Jun 2005 17:16:22 +0000 Subject: [PATCH] NFSv4: ACL support for the NFSv4 client: read Client-side support for NFSv4 ACLs. Exports the raw xdr code via the system.nfs4_acl extended attribute. It is up to userspace to decode the acl (and to provide correctly xdr'd acls on setxattr), and to convert to/from POSIX ACLs if desired. This patch provides only the read support. Signed-off-by: J. Bruce Fields Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 69 +++++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 65 insertions(+), 4 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 1b14d17ae9a4..c91c09938a55 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2162,6 +2162,60 @@ nfs4_proc_file_release(struct inode *inode, struct file *filp) return 0; } +static inline int nfs4_server_supports_acls(struct nfs_server *server) +{ + return (server->caps & NFS_CAP_ACLS) + && (server->acl_bitmask & ACL4_SUPPORT_ALLOW_ACL) + && (server->acl_bitmask & ACL4_SUPPORT_DENY_ACL); +} + +/* Assuming that XATTR_SIZE_MAX is a multiple of PAGE_CACHE_SIZE, and that + * it's OK to put sizeof(void) * (XATTR_SIZE_MAX/PAGE_CACHE_SIZE) bytes on + * the stack. + */ +#define NFS4ACL_MAXPAGES (XATTR_SIZE_MAX >> PAGE_CACHE_SHIFT) + +static void buf_to_pages(const void *buf, size_t buflen, + struct page **pages, unsigned int *pgbase) +{ + const void *p = buf; + + *pgbase = offset_in_page(buf); + p -= *pgbase; + while (p < buf + buflen) { + *(pages++) = virt_to_page(p); + p += PAGE_CACHE_SIZE; + } +} + +static ssize_t nfs4_proc_get_acl(struct inode *inode, void *buf, size_t buflen) +{ + struct nfs_server *server = NFS_SERVER(inode); + struct page *pages[NFS4ACL_MAXPAGES]; + struct nfs_getaclargs args = { + .fh = NFS_FH(inode), + .acl_pages = pages, + .acl_len = buflen, + }; + size_t resp_len = buflen; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_GETACL], + .rpc_argp = &args, + .rpc_resp = &resp_len, + }; + int ret; + + if (!nfs4_server_supports_acls(server)) + return -EOPNOTSUPP; + buf_to_pages(buf, buflen, args.acl_pages, &args.acl_pgbase); + ret = rpc_call_sync(NFS_CLIENT(inode), &msg, 0); + if (buflen && resp_len > buflen) + return -ERANGE; + if (ret == 0) + ret = resp_len; + return ret; +} + static int nfs4_async_handle_error(struct rpc_task *task, struct nfs_server *server) { @@ -2733,6 +2787,8 @@ nfs4_proc_lock(struct file *filp, int cmd, struct file_lock *request) } +#define XATTR_NAME_NFSV4_ACL "system.nfs4_acl" + int nfs4_setxattr(struct dentry *dentry, const char *key, const void *buf, size_t buflen, int flags) { @@ -2746,18 +2802,23 @@ int nfs4_setxattr(struct dentry *dentry, const char *key, const void *buf, ssize_t nfs4_getxattr(struct dentry *dentry, const char *key, void *buf, size_t buflen) { - return -EOPNOTSUPP; + struct inode *inode = dentry->d_inode; + + if (strcmp(key, XATTR_NAME_NFSV4_ACL) != 0) + return -EOPNOTSUPP; + + return nfs4_proc_get_acl(inode, buf, buflen); } ssize_t nfs4_listxattr(struct dentry *dentry, char *buf, size_t buflen) { - ssize_t len = 0; + size_t len = strlen(XATTR_NAME_NFSV4_ACL) + 1; if (buf && buflen < len) return -ERANGE; if (buf) - memcpy(buf, "", 0); - return 0; + memcpy(buf, XATTR_NAME_NFSV4_ACL, len); + return len; } struct nfs4_state_recovery_ops nfs4_reboot_recovery_ops = { -- cgit v1.2.3-59-g8ed1b From 23ec6965c20db96bc8ea7af0ec178f074dd31c40 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 22 Jun 2005 17:16:22 +0000 Subject: [PATCH] NFSv4: Client-side xdr for writing NFSv4 acls Client-side support for NFSv4 acls: xdr encoding and decoding routines for writing acls Signed-off-by: J. Bruce Fields Signed-off-by: Trond Myklebust --- fs/nfs/nfs4xdr.c | 71 ++++++++++++++++++++++++++++++++++++++++++++++++- include/linux/nfs4.h | 1 + include/linux/nfs_xdr.h | 7 +++++ 3 files changed, 78 insertions(+), 1 deletion(-) diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 6f1c003ee33a..325cd6d4f23a 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -372,6 +372,13 @@ static int nfs_stat_to_errno(int); decode_putfh_maxsz + \ op_decode_hdr_maxsz + \ nfs4_fattr_bitmap_maxsz + 1) +#define NFS4_enc_setacl_sz (compound_encode_hdr_maxsz + \ + encode_putfh_maxsz + \ + op_encode_hdr_maxsz + 4 + \ + nfs4_fattr_bitmap_maxsz + 1) +#define NFS4_dec_setacl_sz (compound_decode_hdr_maxsz + \ + decode_putfh_maxsz + \ + op_decode_hdr_maxsz + nfs4_fattr_bitmap_maxsz) static struct { unsigned int mode; @@ -471,7 +478,7 @@ static int encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const s * In the worst-case, this would be * 12(bitmap) + 4(attrlen) + 8(size) + 4(mode) + 4(atime) + 4(mtime) * = 36 bytes, plus any contribution from variable-length fields - * such as owner/group/acl's. + * such as owner/group. */ len = 16; @@ -1095,6 +1102,25 @@ static int encode_renew(struct xdr_stream *xdr, const struct nfs4_client *client return 0; } +static int +encode_setacl(struct xdr_stream *xdr, struct nfs_setaclargs *arg) +{ + uint32_t *p; + + RESERVE_SPACE(4+sizeof(zero_stateid.data)); + WRITE32(OP_SETATTR); + WRITEMEM(zero_stateid.data, sizeof(zero_stateid.data)); + RESERVE_SPACE(2*4); + WRITE32(1); + WRITE32(FATTR4_WORD0_ACL); + if (arg->acl_len % 4) + return -EINVAL; + RESERVE_SPACE(4); + WRITE32(arg->acl_len); + xdr_write_pages(xdr, arg->acl_pages, arg->acl_pgbase, arg->acl_len); + return 0; +} + static int encode_savefh(struct xdr_stream *xdr) { @@ -3492,6 +3518,48 @@ out: } +/* + * Encode an SETACL request + */ +static int +nfs4_xdr_enc_setacl(struct rpc_rqst *req, uint32_t *p, struct nfs_setaclargs *args) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .nops = 2, + }; + int status; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, &hdr); + status = encode_putfh(&xdr, args->fh); + if (status) + goto out; + status = encode_setacl(&xdr, args); +out: + return status; +} +/* + * Decode SETACL response + */ +static int +nfs4_xdr_dec_setacl(struct rpc_rqst *rqstp, uint32_t *p, void *res) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (status) + goto out; + status = decode_putfh(&xdr); + if (status) + goto out; + status = decode_setattr(&xdr, res); +out: + return status; +} /* * Decode GETACL response @@ -4117,6 +4185,7 @@ struct rpc_procinfo nfs4_procedures[] = { PROC(SERVER_CAPS, enc_server_caps, dec_server_caps), PROC(DELEGRETURN, enc_delegreturn, dec_delegreturn), PROC(GETACL, enc_getacl, dec_getacl), + PROC(SETACL, enc_setacl, dec_setacl), }; struct rpc_version nfs_version4 = { diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index 6ee7e2585af5..5bb5b2fd7ba2 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -383,6 +383,7 @@ enum { NFSPROC4_CLNT_SERVER_CAPS, NFSPROC4_CLNT_DELEGRETURN, NFSPROC4_CLNT_GETACL, + NFSPROC4_CLNT_SETACL, }; #endif diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 9f5e1d407c7b..46b206b460c0 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -326,6 +326,13 @@ struct nfs_setattrargs { const u32 * bitmask; }; +struct nfs_setaclargs { + struct nfs_fh * fh; + size_t acl_len; + unsigned int acl_pgbase; + struct page ** acl_pages; +}; + struct nfs_getaclargs { struct nfs_fh * fh; size_t acl_len; -- cgit v1.2.3-59-g8ed1b From 4b580ee3dc00f9828a9a7aad2724f448fdc94075 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 22 Jun 2005 17:16:23 +0000 Subject: [PATCH] NFSv4: ACL support for the NFSv4 client: write Client-side write support for NFSv4 ACLs. Signed-off-by: J. Bruce Fields Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 34 +++++++++++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index c91c09938a55..d969dd13e7db 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2216,6 +2216,29 @@ static ssize_t nfs4_proc_get_acl(struct inode *inode, void *buf, size_t buflen) return ret; } +static int nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t buflen) +{ + struct nfs_server *server = NFS_SERVER(inode); + struct page *pages[NFS4ACL_MAXPAGES]; + struct nfs_setaclargs arg = { + .fh = NFS_FH(inode), + .acl_pages = pages, + .acl_len = buflen, + }; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SETACL], + .rpc_argp = &arg, + .rpc_resp = NULL, + }; + int ret; + + if (!nfs4_server_supports_acls(server)) + return -EOPNOTSUPP; + buf_to_pages(buf, buflen, arg.acl_pages, &arg.acl_pgbase); + ret = rpc_call_sync(NFS_SERVER(inode)->client, &msg, 0); + return ret; +} + static int nfs4_async_handle_error(struct rpc_task *task, struct nfs_server *server) { @@ -2792,7 +2815,16 @@ nfs4_proc_lock(struct file *filp, int cmd, struct file_lock *request) int nfs4_setxattr(struct dentry *dentry, const char *key, const void *buf, size_t buflen, int flags) { - return -EOPNOTSUPP; + struct inode *inode = dentry->d_inode; + + if (strcmp(key, XATTR_NAME_NFSV4_ACL) != 0) + return -EOPNOTSUPP; + + if (!S_ISREG(inode->i_mode) && + (!S_ISDIR(inode->i_mode) || inode->i_mode & S_ISVTX)) + return -EPERM; + + return nfs4_proc_set_acl(inode, buf, buflen); } /* The getxattr man page suggests returning -ENODATA for unknown attributes, -- cgit v1.2.3-59-g8ed1b From e50a1c2e1f816c81eed6a589019052cb44189267 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 22 Jun 2005 17:16:23 +0000 Subject: [PATCH] NFSv4: client-side caching NFSv4 ACLs Add nfs4_acl field to the nfs_inode, and use it to cache acls. Only cache acls of size up to a page. Also prepare for up to a page of acl data even when the user doesn't pass in a buffer, as when they want to get the acl length to decide what size buffer to allocate. Signed-off-by: J. Bruce Fields Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 7 ++- fs/nfs/nfs4proc.c | 129 +++++++++++++++++++++++++++++++++++++++++++++---- include/linux/nfs_fs.h | 2 +- 3 files changed, 124 insertions(+), 14 deletions(-) diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index c45bd52cc1d7..350c48c12639 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -142,10 +142,6 @@ nfs_delete_inode(struct inode * inode) clear_inode(inode); } -/* - * For the moment, the only task for the NFS clear_inode method is to - * release the mmap credential - */ static void nfs_clear_inode(struct inode *inode) { @@ -1923,6 +1919,9 @@ static struct inode *nfs_alloc_inode(struct super_block *sb) if (!nfsi) return NULL; nfsi->flags = 0; +#ifdef CONFIG_NFS_V4 + nfsi->nfs4_acl = NULL; +#endif /* CONFIG_NFS_V4 */ return &nfsi->vfs_inode; } diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index d969dd13e7db..128d01cfea19 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2188,9 +2188,75 @@ static void buf_to_pages(const void *buf, size_t buflen, } } -static ssize_t nfs4_proc_get_acl(struct inode *inode, void *buf, size_t buflen) +struct nfs4_cached_acl { + int cached; + size_t len; + char data[]; +}; + +static void nfs4_set_cached_acl(struct inode *inode, struct nfs4_cached_acl *acl) +{ + struct nfs_inode *nfsi = NFS_I(inode); + + spin_lock(&inode->i_lock); + kfree(nfsi->nfs4_acl); + nfsi->nfs4_acl = acl; + spin_unlock(&inode->i_lock); +} + +static void nfs4_zap_acl_attr(struct inode *inode) +{ + nfs4_set_cached_acl(inode, NULL); +} + +static inline ssize_t nfs4_read_cached_acl(struct inode *inode, char *buf, size_t buflen) +{ + struct nfs_inode *nfsi = NFS_I(inode); + struct nfs4_cached_acl *acl; + int ret = -ENOENT; + + spin_lock(&inode->i_lock); + acl = nfsi->nfs4_acl; + if (acl == NULL) + goto out; + if (buf == NULL) /* user is just asking for length */ + goto out_len; + if (acl->cached == 0) + goto out; + ret = -ERANGE; /* see getxattr(2) man page */ + if (acl->len > buflen) + goto out; + memcpy(buf, acl->data, acl->len); +out_len: + ret = acl->len; +out: + spin_unlock(&inode->i_lock); + return ret; +} + +static void nfs4_write_cached_acl(struct inode *inode, const char *buf, size_t acl_len) +{ + struct nfs4_cached_acl *acl; + + if (buf && acl_len <= PAGE_SIZE) { + acl = kmalloc(sizeof(*acl) + acl_len, GFP_KERNEL); + if (acl == NULL) + goto out; + acl->cached = 1; + memcpy(acl->data, buf, acl_len); + } else { + acl = kmalloc(sizeof(*acl), GFP_KERNEL); + if (acl == NULL) + goto out; + acl->cached = 0; + } + acl->len = acl_len; +out: + nfs4_set_cached_acl(inode, acl); +} + +static inline ssize_t nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t buflen) { - struct nfs_server *server = NFS_SERVER(inode); struct page *pages[NFS4ACL_MAXPAGES]; struct nfs_getaclargs args = { .fh = NFS_FH(inode), @@ -2198,24 +2264,66 @@ static ssize_t nfs4_proc_get_acl(struct inode *inode, void *buf, size_t buflen) .acl_len = buflen, }; size_t resp_len = buflen; + void *resp_buf; struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_GETACL], .rpc_argp = &args, .rpc_resp = &resp_len, }; + struct page *localpage = NULL; int ret; - if (!nfs4_server_supports_acls(server)) - return -EOPNOTSUPP; - buf_to_pages(buf, buflen, args.acl_pages, &args.acl_pgbase); + if (buflen < PAGE_SIZE) { + /* As long as we're doing a round trip to the server anyway, + * let's be prepared for a page of acl data. */ + localpage = alloc_page(GFP_KERNEL); + resp_buf = page_address(localpage); + if (localpage == NULL) + return -ENOMEM; + args.acl_pages[0] = localpage; + args.acl_pgbase = 0; + args.acl_len = PAGE_SIZE; + } else { + resp_buf = buf; + buf_to_pages(buf, buflen, args.acl_pages, &args.acl_pgbase); + } ret = rpc_call_sync(NFS_CLIENT(inode), &msg, 0); - if (buflen && resp_len > buflen) - return -ERANGE; - if (ret == 0) - ret = resp_len; + if (ret) + goto out_free; + if (resp_len > args.acl_len) + nfs4_write_cached_acl(inode, NULL, resp_len); + else + nfs4_write_cached_acl(inode, resp_buf, resp_len); + if (buf) { + ret = -ERANGE; + if (resp_len > buflen) + goto out_free; + if (localpage) + memcpy(buf, resp_buf, resp_len); + } + ret = resp_len; +out_free: + if (localpage) + __free_page(localpage); return ret; } +static ssize_t nfs4_proc_get_acl(struct inode *inode, void *buf, size_t buflen) +{ + struct nfs_server *server = NFS_SERVER(inode); + int ret; + + if (!nfs4_server_supports_acls(server)) + return -EOPNOTSUPP; + ret = nfs_revalidate_inode(server, inode); + if (ret < 0) + return ret; + ret = nfs4_read_cached_acl(inode, buf, buflen); + if (ret != -ENOENT) + return ret; + return nfs4_get_acl_uncached(inode, buf, buflen); +} + static int nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t buflen) { struct nfs_server *server = NFS_SERVER(inode); @@ -2236,6 +2344,8 @@ static int nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t buflen return -EOPNOTSUPP; buf_to_pages(buf, buflen, arg.acl_pages, &arg.acl_pgbase); ret = rpc_call_sync(NFS_SERVER(inode)->client, &msg, 0); + if (ret == 0) + nfs4_write_cached_acl(inode, buf, buflen); return ret; } @@ -2907,6 +3017,7 @@ struct nfs_rpc_ops nfs_v4_clientops = { .file_open = nfs4_proc_file_open, .file_release = nfs4_proc_file_release, .lock = nfs4_proc_lock, + .clear_acl_cache = nfs4_zap_acl_attr, }; /* diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 140bdf489f71..d2b5d7e0e85a 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -169,13 +169,13 @@ struct nfs_inode { wait_queue_head_t nfs_i_wait; #ifdef CONFIG_NFS_V4 + struct nfs4_cached_acl *nfs4_acl; /* NFSv4 state */ struct list_head open_states; struct nfs_delegation *delegation; int delegation_state; struct rw_semaphore rwsem; #endif /* CONFIG_NFS_V4*/ - struct inode vfs_inode; }; -- cgit v1.2.3-59-g8ed1b From 438b6fdebf2a2e8573e7290bc176feb4d4475f43 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 22 Jun 2005 17:16:23 +0000 Subject: [PATCH] RPC: Don't fall back from krb5p to krb5i We shouldn't be silently falling back from krb5p to krb5i. Signed-off-by: J. Bruce Fields Signed-off-by: Trond Myklebust --- net/sunrpc/auth_gss/auth_gss.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index a33b627cbef4..7d88db83ab12 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -675,9 +675,8 @@ gss_create(struct rpc_clnt *clnt, rpc_authflavor_t flavor) goto err_free; } gss_auth->service = gss_pseudoflavor_to_service(gss_auth->mech, flavor); - /* FIXME: Will go away once privacy support is merged in */ - if (gss_auth->service == RPC_GSS_SVC_PRIVACY) - gss_auth->service = RPC_GSS_SVC_INTEGRITY; + if (gss_auth->service == 0) + goto err_put_mech; INIT_LIST_HEAD(&gss_auth->upcalls); spin_lock_init(&gss_auth->lock); auth = &gss_auth->rpc_auth; -- cgit v1.2.3-59-g8ed1b From 6a19275ada9137435da58990c8f8d3f58e170bf1 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 22 Jun 2005 17:16:23 +0000 Subject: [PATCH] RPC: [PATCH] improve rpcauthauth_create error returns Currently we return -ENOMEM for every single failure to create a new auth. This is actually accurate for auth_null and auth_unix, but for auth_gss it's a bit confusing. Allow rpcauth_create (and the ->create methods) to return errors. With this patch, the user may sometimes see an EINVAL instead. Whee. Signed-off-by: J. Bruce Fields Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 27 +++++++++++++++++---------- fs/nfs/nfs4state.c | 3 ++- net/sunrpc/auth.c | 6 +++--- net/sunrpc/auth_gss/auth_gss.c | 13 +++++++++---- net/sunrpc/clnt.c | 6 ++++-- 5 files changed, 35 insertions(+), 20 deletions(-) diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 350c48c12639..97b3fe7ece63 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -160,11 +160,10 @@ nfs_clear_inode(struct inode *inode) void nfs_umount_begin(struct super_block *sb) { - struct nfs_server *server = NFS_SB(sb); - struct rpc_clnt *rpc; + struct rpc_clnt *rpc = NFS_SB(sb)->client; /* -EIO all pending I/O */ - if ((rpc = server->client) != NULL) + if (!IS_ERR(rpc)) rpc_killall_tasks(rpc); } @@ -450,11 +449,14 @@ nfs_fill_super(struct super_block *sb, struct nfs_mount_data *data, int silent) return PTR_ERR(server->client); /* RFC 2623, sec 2.3.2 */ if (authflavor != RPC_AUTH_UNIX) { + struct rpc_auth *auth; + server->client_sys = rpc_clone_client(server->client); if (IS_ERR(server->client_sys)) return PTR_ERR(server->client_sys); - if (!rpcauth_create(RPC_AUTH_UNIX, server->client_sys)) - return -ENOMEM; + auth = rpcauth_create(RPC_AUTH_UNIX, server->client_sys); + if (IS_ERR(auth)) + return PTR_ERR(auth); } else { atomic_inc(&server->client->cl_count); server->client_sys = server->client; @@ -1450,6 +1452,7 @@ static struct super_block *nfs_get_sb(struct file_system_type *fs_type, memset(server, 0, sizeof(struct nfs_server)); /* Zero out the NFS state stuff */ init_nfsv4_state(server); + server->client = server->client_sys = ERR_PTR(-EINVAL); root = &server->fh; if (data->flags & NFS_MOUNT_VER3) @@ -1506,9 +1509,9 @@ static void nfs_kill_super(struct super_block *s) kill_anon_super(s); - if (server->client != NULL && !IS_ERR(server->client)) + if (!IS_ERR(server->client)) rpc_shutdown_client(server->client); - if (server->client_sys != NULL && !IS_ERR(server->client_sys)) + if (!IS_ERR(server->client_sys)) rpc_shutdown_client(server->client_sys); if (!(server->flags & NFS_MOUNT_NONLM)) @@ -1650,7 +1653,7 @@ static int nfs4_fill_super(struct super_block *sb, struct nfs4_mount_data *data, } down_write(&clp->cl_sem); - if (clp->cl_rpcclient == NULL) { + if (IS_ERR(clp->cl_rpcclient)) { xprt = xprt_create_proto(proto, &server->addr, &timeparms); if (IS_ERR(xprt)) { up_write(&clp->cl_sem); @@ -1711,9 +1714,12 @@ static int nfs4_fill_super(struct super_block *sb, struct nfs4_mount_data *data, } if (clnt->cl_auth->au_flavor != authflavour) { - if (rpcauth_create(authflavour, clnt) == NULL) { + struct rpc_auth *auth; + + auth = rpcauth_create(authflavour, clnt); + if (IS_ERR(auth)) { dprintk("%s: couldn't create credcache!\n", __FUNCTION__); - return -ENOMEM; + return PTR_ERR(auth); } } @@ -1788,6 +1794,7 @@ static struct super_block *nfs4_get_sb(struct file_system_type *fs_type, memset(server, 0, sizeof(struct nfs_server)); /* Zero out the NFS state stuff */ init_nfsv4_state(server); + server->client = server->client_sys = ERR_PTR(-EINVAL); p = nfs_copy_user_string(NULL, &data->hostname, 256); if (IS_ERR(p)) diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 17b187f2d776..591ad1d51880 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -110,6 +110,7 @@ nfs4_alloc_client(struct in_addr *addr) INIT_LIST_HEAD(&clp->cl_superblocks); init_waitqueue_head(&clp->cl_waitq); rpc_init_wait_queue(&clp->cl_rpcwaitq, "NFS4 client"); + clp->cl_rpcclient = ERR_PTR(-EINVAL); clp->cl_boot_time = CURRENT_TIME; clp->cl_state = 1 << NFS4CLNT_OK; return clp; @@ -131,7 +132,7 @@ nfs4_free_client(struct nfs4_client *clp) if (clp->cl_cred) put_rpccred(clp->cl_cred); nfs_idmap_delete(clp); - if (clp->cl_rpcclient) + if (!IS_ERR(clp->cl_rpcclient)) rpc_shutdown_client(clp->cl_rpcclient); kfree(clp); nfs_callback_down(); diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index 9bcec9b927b9..505e2d4b3d62 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -66,10 +66,10 @@ rpcauth_create(rpc_authflavor_t pseudoflavor, struct rpc_clnt *clnt) u32 flavor = pseudoflavor_to_flavor(pseudoflavor); if (flavor >= RPC_AUTH_MAXFLAVOR || !(ops = auth_flavors[flavor])) - return NULL; + return ERR_PTR(-EINVAL); auth = ops->create(clnt, pseudoflavor); - if (!auth) - return NULL; + if (IS_ERR(auth)) + return auth; if (clnt->cl_auth) rpcauth_destroy(clnt->cl_auth); clnt->cl_auth = auth; diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 7d88db83ab12..2f7b867161d2 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -660,14 +660,16 @@ gss_create(struct rpc_clnt *clnt, rpc_authflavor_t flavor) { struct gss_auth *gss_auth; struct rpc_auth * auth; + int err = -ENOMEM; /* XXX? */ dprintk("RPC: creating GSS authenticator for client %p\n",clnt); if (!try_module_get(THIS_MODULE)) - return NULL; + return ERR_PTR(err); if (!(gss_auth = kmalloc(sizeof(*gss_auth), GFP_KERNEL))) goto out_dec; gss_auth->client = clnt; + err = -EINVAL; gss_auth->mech = gss_mech_get_by_pseudoflavor(flavor); if (!gss_auth->mech) { printk(KERN_WARNING "%s: Pseudoflavor %d not found!", @@ -686,15 +688,18 @@ gss_create(struct rpc_clnt *clnt, rpc_authflavor_t flavor) auth->au_flavor = flavor; atomic_set(&auth->au_count, 1); - if (rpcauth_init_credcache(auth, GSS_CRED_EXPIRE) < 0) + err = rpcauth_init_credcache(auth, GSS_CRED_EXPIRE); + if (err) goto err_put_mech; snprintf(gss_auth->path, sizeof(gss_auth->path), "%s/%s", clnt->cl_pathname, gss_auth->mech->gm_name); gss_auth->dentry = rpc_mkpipe(gss_auth->path, clnt, &gss_upcall_ops, RPC_PIPE_WAIT_FOR_OPEN); - if (IS_ERR(gss_auth->dentry)) + if (IS_ERR(gss_auth->dentry)) { + err = PTR_ERR(gss_auth->dentry); goto err_put_mech; + } return auth; err_put_mech: @@ -703,7 +708,7 @@ err_free: kfree(gss_auth); out_dec: module_put(THIS_MODULE); - return NULL; + return ERR_PTR(err); } static void diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index b36797ad8083..9da1deb482e2 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -103,6 +103,7 @@ rpc_new_client(struct rpc_xprt *xprt, char *servname, { struct rpc_version *version; struct rpc_clnt *clnt = NULL; + struct rpc_auth *auth; int err; int len; @@ -157,10 +158,11 @@ rpc_new_client(struct rpc_xprt *xprt, char *servname, if (err < 0) goto out_no_path; - err = -ENOMEM; - if (!rpcauth_create(flavor, clnt)) { + auth = rpcauth_create(flavor, clnt); + if (IS_ERR(auth)) { printk(KERN_INFO "RPC: Couldn't create auth handle (flavor %u)\n", flavor); + err = PTR_ERR(auth); goto out_no_auth; } -- cgit v1.2.3-59-g8ed1b From cdf477068e6db0c3e19df96f46abb85202de138c Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 22 Jun 2005 17:16:23 +0000 Subject: [PATCH] RPC: Return -EPFNOSUPPORT for RPC programs that are unavailable Signed-off-by: Andreas Gruenbacher Signed-off-by: Olaf Kirch Signed-off-by: Trond Myklebust --- net/sunrpc/clnt.c | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 9da1deb482e2..33f12b84e265 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1021,10 +1021,11 @@ call_verify(struct rpc_task *task) case RPC_AUTH_ERROR: break; case RPC_MISMATCH: - printk(KERN_WARNING "%s: RPC call version mismatch!\n", __FUNCTION__); - goto out_eio; + dprintk("%s: RPC call version mismatch!\n", __FUNCTION__); + error = -EPROTONOSUPPORT; + goto out_err; default: - printk(KERN_WARNING "%s: RPC call rejected, unknown error: %x\n", __FUNCTION__, n); + dprintk("%s: RPC call rejected, unknown error: %x\n", __FUNCTION__, n); goto out_eio; } if (--len < 0) @@ -1075,23 +1076,26 @@ call_verify(struct rpc_task *task) case RPC_SUCCESS: return p; case RPC_PROG_UNAVAIL: - printk(KERN_WARNING "RPC: call_verify: program %u is unsupported by server %s\n", + dprintk("RPC: call_verify: program %u is unsupported by server %s\n", (unsigned int)task->tk_client->cl_prog, task->tk_client->cl_server); - goto out_eio; + error = -EPFNOSUPPORT; + goto out_err; case RPC_PROG_MISMATCH: - printk(KERN_WARNING "RPC: call_verify: program %u, version %u unsupported by server %s\n", + dprintk("RPC: call_verify: program %u, version %u unsupported by server %s\n", (unsigned int)task->tk_client->cl_prog, (unsigned int)task->tk_client->cl_vers, task->tk_client->cl_server); - goto out_eio; + error = -EPROTONOSUPPORT; + goto out_err; case RPC_PROC_UNAVAIL: - printk(KERN_WARNING "RPC: call_verify: proc %p unsupported by program %u, version %u on server %s\n", + dprintk("RPC: call_verify: proc %p unsupported by program %u, version %u on server %s\n", task->tk_msg.rpc_proc, task->tk_client->cl_prog, task->tk_client->cl_vers, task->tk_client->cl_server); - goto out_eio; + error = -EOPNOTSUPP; + goto out_err; case RPC_GARBAGE_ARGS: dprintk("RPC: %4d %s: server saw garbage\n", task->tk_pid, __FUNCTION__); break; /* retry */ @@ -1104,7 +1108,7 @@ out_retry: task->tk_client->cl_stats->rpcgarbage++; if (task->tk_garb_retry) { task->tk_garb_retry--; - dprintk(KERN_WARNING "RPC %s: retrying %4d\n", __FUNCTION__, task->tk_pid); + dprintk("RPC %s: retrying %4d\n", __FUNCTION__, task->tk_pid); task->tk_action = call_bind; return NULL; } -- cgit v1.2.3-59-g8ed1b From 007e251f2b2760f738c92adc8c80cbae0bed3ce5 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 22 Jun 2005 17:16:23 +0000 Subject: [PATCH] RPC: Allow multiple RPC client programs to share the same transport Signed-off-by: Andreas Gruenbacher Acked-by: Olaf Kirch Signed-off-by: Trond Myklebust --- include/linux/sunrpc/clnt.h | 2 ++ net/sunrpc/clnt.c | 40 ++++++++++++++++++++++++++++++++++++++++ net/sunrpc/pmap_clnt.c | 3 +++ net/sunrpc/sunrpc_syms.c | 1 + 4 files changed, 46 insertions(+) diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index d25e80f77ff5..ab151bbb66df 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -114,6 +114,8 @@ struct rpc_clnt *rpc_create_client(struct rpc_xprt *xprt, char *servname, struct rpc_clnt *rpc_new_client(struct rpc_xprt *xprt, char *servname, struct rpc_program *info, u32 version, rpc_authflavor_t authflavor); +struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *, + struct rpc_program *, int); struct rpc_clnt *rpc_clone_client(struct rpc_clnt *); int rpc_shutdown_client(struct rpc_clnt *); int rpc_destroy_client(struct rpc_clnt *); diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 33f12b84e265..c979fcf88798 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -241,6 +241,8 @@ rpc_clone_client(struct rpc_clnt *clnt) rpc_init_rtt(&new->cl_rtt_default, clnt->cl_xprt->timeout.to_initval); if (new->cl_auth) atomic_inc(&new->cl_auth->au_count); + new->cl_pmap = &new->cl_pmap_default; + rpc_init_wait_queue(&new->cl_pmap_default.pm_bindwait, "bindwait"); return new; out_no_clnt: printk(KERN_INFO "RPC: out of memory in %s\n", __FUNCTION__); @@ -329,6 +331,44 @@ rpc_release_client(struct rpc_clnt *clnt) rpc_destroy_client(clnt); } +/** + * rpc_bind_new_program - bind a new RPC program to an existing client + * @old - old rpc_client + * @program - rpc program to set + * @vers - rpc program version + * + * Clones the rpc client and sets up a new RPC program. This is mainly + * of use for enabling different RPC programs to share the same transport. + * The Sun NFSv2/v3 ACL protocol can do this. + */ +struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *old, + struct rpc_program *program, + int vers) +{ + struct rpc_clnt *clnt; + struct rpc_version *version; + int err; + + BUG_ON(vers >= program->nrvers || !program->version[vers]); + version = program->version[vers]; + clnt = rpc_clone_client(old); + if (IS_ERR(clnt)) + goto out; + clnt->cl_procinfo = version->procs; + clnt->cl_maxproc = version->nrprocs; + clnt->cl_protname = program->name; + clnt->cl_prog = program->number; + clnt->cl_vers = version->number; + clnt->cl_stats = program->stats; + err = rpc_ping(clnt, RPC_TASK_SOFT|RPC_TASK_NOINTR); + if (err != 0) { + rpc_shutdown_client(clnt); + clnt = ERR_PTR(err); + } +out: + return clnt; +} + /* * Default callback for async RPC calls */ diff --git a/net/sunrpc/pmap_clnt.c b/net/sunrpc/pmap_clnt.c index df4d84c9020d..4e81f2766923 100644 --- a/net/sunrpc/pmap_clnt.c +++ b/net/sunrpc/pmap_clnt.c @@ -53,6 +53,9 @@ rpc_getport(struct rpc_task *task, struct rpc_clnt *clnt) task->tk_pid, clnt->cl_server, map->pm_prog, map->pm_vers, map->pm_prot); + /* Autobind on cloned rpc clients is discouraged */ + BUG_ON(clnt->cl_parent != clnt); + spin_lock(&pmap_lock); if (map->pm_binding) { rpc_sleep_on(&map->pm_bindwait, task, NULL, NULL); diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c index 1b0ff7e0e869..d8673f66acc3 100644 --- a/net/sunrpc/sunrpc_syms.c +++ b/net/sunrpc/sunrpc_syms.c @@ -42,6 +42,7 @@ EXPORT_SYMBOL(rpc_release_task); /* RPC client functions */ EXPORT_SYMBOL(rpc_create_client); EXPORT_SYMBOL(rpc_clone_client); +EXPORT_SYMBOL(rpc_bind_new_program); EXPORT_SYMBOL(rpc_destroy_client); EXPORT_SYMBOL(rpc_shutdown_client); EXPORT_SYMBOL(rpc_release_client); -- cgit v1.2.3-59-g8ed1b From e053d1ab62c8ef0eff3dd4c95448cad3c6d2fbf4 Mon Sep 17 00:00:00 2001 From: Olaf Kirch Date: Wed, 22 Jun 2005 17:16:24 +0000 Subject: [PATCH] RPC: Lazy RPC receive buffer allocation Signed-off-by: Olaf Kirch Signed-off-by: Andreas Gruenbacher Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xdr.h | 2 +- net/sunrpc/xdr.c | 16 +++++++++++++--- net/sunrpc/xprt.c | 26 ++++++++++++++++++++++---- 3 files changed, 36 insertions(+), 8 deletions(-) diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 541dcf838abf..0f5b7a5a7432 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -160,7 +160,7 @@ typedef struct { typedef size_t (*skb_read_actor_t)(skb_reader_t *desc, void *to, size_t len); -extern void xdr_partial_copy_from_skb(struct xdr_buf *, unsigned int, +extern int xdr_partial_copy_from_skb(struct xdr_buf *, unsigned int, skb_reader_t *, skb_read_actor_t); struct socket; diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index f86d1baa6302..65b268d39782 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -176,7 +176,7 @@ xdr_inline_pages(struct xdr_buf *xdr, unsigned int offset, xdr->buflen += len; } -void +int xdr_partial_copy_from_skb(struct xdr_buf *xdr, unsigned int base, skb_reader_t *desc, skb_read_actor_t copy_actor) @@ -190,7 +190,7 @@ xdr_partial_copy_from_skb(struct xdr_buf *xdr, unsigned int base, len -= base; ret = copy_actor(desc, (char *)xdr->head[0].iov_base + base, len); if (ret != len || !desc->count) - return; + return 0; base = 0; } else base -= len; @@ -210,6 +210,14 @@ xdr_partial_copy_from_skb(struct xdr_buf *xdr, unsigned int base, do { char *kaddr; + /* ACL likes to be lazy in allocating pages - ACLs + * are small by default but can get huge. */ + if (unlikely(*ppage == NULL)) { + *ppage = alloc_page(GFP_ATOMIC); + if (unlikely(*ppage == NULL)) + return -ENOMEM; + } + len = PAGE_CACHE_SIZE; kaddr = kmap_atomic(*ppage, KM_SKB_SUNRPC_DATA); if (base) { @@ -226,13 +234,15 @@ xdr_partial_copy_from_skb(struct xdr_buf *xdr, unsigned int base, flush_dcache_page(*ppage); kunmap_atomic(kaddr, KM_SKB_SUNRPC_DATA); if (ret != len || !desc->count) - return; + return 0; ppage++; } while ((pglen -= len) != 0); copy_tail: len = xdr->tail[0].iov_len; if (base < len) copy_actor(desc, (char *)xdr->tail[0].iov_base + base, len - base); + + return 0; } diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index c74a6bb94074..a180ed4952d6 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -725,7 +725,8 @@ csum_partial_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb) goto no_checksum; desc.csum = csum_partial(skb->data, desc.offset, skb->csum); - xdr_partial_copy_from_skb(xdr, 0, &desc, skb_read_and_csum_bits); + if (xdr_partial_copy_from_skb(xdr, 0, &desc, skb_read_and_csum_bits) < 0) + return -1; if (desc.offset != skb->len) { unsigned int csum2; csum2 = skb_checksum(skb, desc.offset, skb->len - desc.offset, 0); @@ -737,7 +738,8 @@ csum_partial_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb) return -1; return 0; no_checksum: - xdr_partial_copy_from_skb(xdr, 0, &desc, skb_read_bits); + if (xdr_partial_copy_from_skb(xdr, 0, &desc, skb_read_bits) < 0) + return -1; if (desc.count) return -1; return 0; @@ -907,6 +909,7 @@ tcp_read_request(struct rpc_xprt *xprt, skb_reader_t *desc) struct rpc_rqst *req; struct xdr_buf *rcvbuf; size_t len; + int r; /* Find and lock the request corresponding to this xid */ spin_lock(&xprt->sock_lock); @@ -927,16 +930,30 @@ tcp_read_request(struct rpc_xprt *xprt, skb_reader_t *desc) len = xprt->tcp_reclen - xprt->tcp_offset; memcpy(&my_desc, desc, sizeof(my_desc)); my_desc.count = len; - xdr_partial_copy_from_skb(rcvbuf, xprt->tcp_copied, + r = xdr_partial_copy_from_skb(rcvbuf, xprt->tcp_copied, &my_desc, tcp_copy_data); desc->count -= len; desc->offset += len; } else - xdr_partial_copy_from_skb(rcvbuf, xprt->tcp_copied, + r = xdr_partial_copy_from_skb(rcvbuf, xprt->tcp_copied, desc, tcp_copy_data); xprt->tcp_copied += len; xprt->tcp_offset += len; + if (r < 0) { + /* Error when copying to the receive buffer, + * usually because we weren't able to allocate + * additional buffer pages. All we can do now + * is turn off XPRT_COPY_DATA, so the request + * will not receive any additional updates, + * and time out. + * Any remaining data from this record will + * be discarded. + */ + xprt->tcp_flags &= ~XPRT_COPY_DATA; + goto out; + } + if (xprt->tcp_copied == req->rq_private_buf.buflen) xprt->tcp_flags &= ~XPRT_COPY_DATA; else if (xprt->tcp_offset == xprt->tcp_reclen) { @@ -949,6 +966,7 @@ tcp_read_request(struct rpc_xprt *xprt, skb_reader_t *desc) req->rq_task->tk_pid); xprt_complete_rqst(xprt, req, xprt->tcp_copied); } +out: spin_unlock(&xprt->sock_lock); tcp_check_recm(xprt); } -- cgit v1.2.3-59-g8ed1b From 7e06b53d796a3740307b54aa2799077f8a0c84e7 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 22 Jun 2005 17:16:24 +0000 Subject: [PATCH] RPC: fix accounting bug in the case of a truncated RPC message Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xdr.h | 2 +- net/sunrpc/xdr.c | 22 ++++++++++++++-------- net/sunrpc/xprt.c | 35 +++++++++++++++++++++++++++-------- 3 files changed, 42 insertions(+), 17 deletions(-) diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 0f5b7a5a7432..5d1eed2b58a1 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -160,7 +160,7 @@ typedef struct { typedef size_t (*skb_read_actor_t)(skb_reader_t *desc, void *to, size_t len); -extern int xdr_partial_copy_from_skb(struct xdr_buf *, unsigned int, +extern ssize_t xdr_partial_copy_from_skb(struct xdr_buf *, unsigned int, skb_reader_t *, skb_read_actor_t); struct socket; diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index 65b268d39782..b3ac3f72bf9c 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -176,21 +176,23 @@ xdr_inline_pages(struct xdr_buf *xdr, unsigned int offset, xdr->buflen += len; } -int +ssize_t xdr_partial_copy_from_skb(struct xdr_buf *xdr, unsigned int base, skb_reader_t *desc, skb_read_actor_t copy_actor) { struct page **ppage = xdr->pages; unsigned int len, pglen = xdr->page_len; + ssize_t copied = 0; int ret; len = xdr->head[0].iov_len; if (base < len) { len -= base; ret = copy_actor(desc, (char *)xdr->head[0].iov_base + base, len); + copied += ret; if (ret != len || !desc->count) - return 0; + goto out; base = 0; } else base -= len; @@ -214,8 +216,11 @@ xdr_partial_copy_from_skb(struct xdr_buf *xdr, unsigned int base, * are small by default but can get huge. */ if (unlikely(*ppage == NULL)) { *ppage = alloc_page(GFP_ATOMIC); - if (unlikely(*ppage == NULL)) - return -ENOMEM; + if (unlikely(*ppage == NULL)) { + if (copied == 0) + copied = -ENOMEM; + goto out; + } } len = PAGE_CACHE_SIZE; @@ -233,16 +238,17 @@ xdr_partial_copy_from_skb(struct xdr_buf *xdr, unsigned int base, } flush_dcache_page(*ppage); kunmap_atomic(kaddr, KM_SKB_SUNRPC_DATA); + copied += ret; if (ret != len || !desc->count) - return 0; + goto out; ppage++; } while ((pglen -= len) != 0); copy_tail: len = xdr->tail[0].iov_len; if (base < len) - copy_actor(desc, (char *)xdr->tail[0].iov_base + base, len - base); - - return 0; + copied += copy_actor(desc, (char *)xdr->tail[0].iov_base + base, len - base); +out: + return copied; } diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index a180ed4952d6..ef941e7de8bf 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -823,10 +823,15 @@ tcp_copy_data(skb_reader_t *desc, void *p, size_t len) { if (len > desc->count) len = desc->count; - if (skb_copy_bits(desc->skb, desc->offset, p, len)) + if (skb_copy_bits(desc->skb, desc->offset, p, len)) { + dprintk("RPC: failed to copy %zu bytes from skb. %zu bytes remain\n", + len, desc->count); return 0; + } desc->offset += len; desc->count -= len; + dprintk("RPC: copied %zu bytes from skb. %zu bytes remain\n", + len, desc->count); return len; } @@ -865,6 +870,8 @@ tcp_read_fraghdr(struct rpc_xprt *xprt, skb_reader_t *desc) static void tcp_check_recm(struct rpc_xprt *xprt) { + dprintk("RPC: xprt = %p, tcp_copied = %lu, tcp_offset = %u, tcp_reclen = %u, tcp_flags = %lx\n", + xprt, xprt->tcp_copied, xprt->tcp_offset, xprt->tcp_reclen, xprt->tcp_flags); if (xprt->tcp_offset == xprt->tcp_reclen) { xprt->tcp_flags |= XPRT_COPY_RECM; xprt->tcp_offset = 0; @@ -909,7 +916,7 @@ tcp_read_request(struct rpc_xprt *xprt, skb_reader_t *desc) struct rpc_rqst *req; struct xdr_buf *rcvbuf; size_t len; - int r; + ssize_t r; /* Find and lock the request corresponding to this xid */ spin_lock(&xprt->sock_lock); @@ -932,15 +939,17 @@ tcp_read_request(struct rpc_xprt *xprt, skb_reader_t *desc) my_desc.count = len; r = xdr_partial_copy_from_skb(rcvbuf, xprt->tcp_copied, &my_desc, tcp_copy_data); - desc->count -= len; - desc->offset += len; + desc->count -= r; + desc->offset += r; } else r = xdr_partial_copy_from_skb(rcvbuf, xprt->tcp_copied, desc, tcp_copy_data); - xprt->tcp_copied += len; - xprt->tcp_offset += len; - if (r < 0) { + if (r > 0) { + xprt->tcp_copied += r; + xprt->tcp_offset += r; + } + if (r != len) { /* Error when copying to the receive buffer, * usually because we weren't able to allocate * additional buffer pages. All we can do now @@ -951,9 +960,18 @@ tcp_read_request(struct rpc_xprt *xprt, skb_reader_t *desc) * be discarded. */ xprt->tcp_flags &= ~XPRT_COPY_DATA; + dprintk("RPC: XID %08x truncated request\n", + ntohl(xprt->tcp_xid)); + dprintk("RPC: xprt = %p, tcp_copied = %lu, tcp_offset = %u, tcp_reclen = %u\n", + xprt, xprt->tcp_copied, xprt->tcp_offset, xprt->tcp_reclen); goto out; } + dprintk("RPC: XID %08x read %u bytes\n", + ntohl(xprt->tcp_xid), r); + dprintk("RPC: xprt = %p, tcp_copied = %lu, tcp_offset = %u, tcp_reclen = %u\n", + xprt, xprt->tcp_copied, xprt->tcp_offset, xprt->tcp_reclen); + if (xprt->tcp_copied == req->rq_private_buf.buflen) xprt->tcp_flags &= ~XPRT_COPY_DATA; else if (xprt->tcp_offset == xprt->tcp_reclen) { @@ -961,12 +979,12 @@ tcp_read_request(struct rpc_xprt *xprt, skb_reader_t *desc) xprt->tcp_flags &= ~XPRT_COPY_DATA; } +out: if (!(xprt->tcp_flags & XPRT_COPY_DATA)) { dprintk("RPC: %4d received reply complete\n", req->rq_task->tk_pid); xprt_complete_rqst(xprt, req, xprt->tcp_copied); } -out: spin_unlock(&xprt->sock_lock); tcp_check_recm(xprt); } @@ -985,6 +1003,7 @@ tcp_read_discard(struct rpc_xprt *xprt, skb_reader_t *desc) desc->count -= len; desc->offset += len; xprt->tcp_offset += len; + dprintk("RPC: discarded %u bytes\n", len); tcp_check_recm(xprt); } -- cgit v1.2.3-59-g8ed1b From bd8100e7eda87507649c6ba4cb32173b34e49986 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 22 Jun 2005 17:16:24 +0000 Subject: [PATCH] RPC: Encode and decode arbitrary XDR arrays Signed-off-by: Andreas Gruenbacher Acked-by: Olaf Kirch Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xdr.h | 19 +++- net/sunrpc/sunrpc_syms.c | 4 + net/sunrpc/xdr.c | 256 ++++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 275 insertions(+), 4 deletions(-) diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 5d1eed2b58a1..34ec3e8d99b3 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -146,7 +146,8 @@ extern void xdr_shift_buf(struct xdr_buf *, size_t); extern void xdr_buf_from_iov(struct kvec *, struct xdr_buf *); extern int xdr_buf_subsegment(struct xdr_buf *, struct xdr_buf *, int, int); extern int xdr_buf_read_netobj(struct xdr_buf *, struct xdr_netobj *, int); -extern int read_bytes_from_xdr_buf(struct xdr_buf *buf, int base, void *obj, int len); +extern int read_bytes_from_xdr_buf(struct xdr_buf *, int, void *, int); +extern int write_bytes_to_xdr_buf(struct xdr_buf *, int, void *, int); /* * Helper structure for copying from an sk_buff. @@ -168,6 +169,22 @@ struct sockaddr; extern int xdr_sendpages(struct socket *, struct sockaddr *, int, struct xdr_buf *, unsigned int, int); +extern int xdr_encode_word(struct xdr_buf *, int, u32); +extern int xdr_decode_word(struct xdr_buf *, int, u32 *); + +struct xdr_array2_desc; +typedef int (*xdr_xcode_elem_t)(struct xdr_array2_desc *desc, void *elem); +struct xdr_array2_desc { + unsigned int elem_size; + unsigned int array_len; + xdr_xcode_elem_t xcode; +}; + +extern int xdr_decode_array2(struct xdr_buf *buf, unsigned int base, + struct xdr_array2_desc *desc); +extern int xdr_encode_array2(struct xdr_buf *buf, unsigned int base, + struct xdr_array2_desc *desc); + /* * Provide some simple tools for XDR buffer overflow-checking etc. */ diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c index d8673f66acc3..32e8acbc60fe 100644 --- a/net/sunrpc/sunrpc_syms.c +++ b/net/sunrpc/sunrpc_syms.c @@ -129,6 +129,10 @@ EXPORT_SYMBOL(xdr_encode_netobj); EXPORT_SYMBOL(xdr_encode_pages); EXPORT_SYMBOL(xdr_inline_pages); EXPORT_SYMBOL(xdr_shift_buf); +EXPORT_SYMBOL(xdr_encode_word); +EXPORT_SYMBOL(xdr_decode_word); +EXPORT_SYMBOL(xdr_encode_array2); +EXPORT_SYMBOL(xdr_decode_array2); EXPORT_SYMBOL(xdr_buf_from_iov); EXPORT_SYMBOL(xdr_buf_subsegment); EXPORT_SYMBOL(xdr_buf_read_netobj); diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index b3ac3f72bf9c..8a4d9c106af1 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -887,8 +887,34 @@ out: return status; } -static int -read_u32_from_xdr_buf(struct xdr_buf *buf, int base, u32 *obj) +/* obj is assumed to point to allocated memory of size at least len: */ +int +write_bytes_to_xdr_buf(struct xdr_buf *buf, int base, void *obj, int len) +{ + struct xdr_buf subbuf; + int this_len; + int status; + + status = xdr_buf_subsegment(buf, &subbuf, base, len); + if (status) + goto out; + this_len = min(len, (int)subbuf.head[0].iov_len); + memcpy(subbuf.head[0].iov_base, obj, this_len); + len -= this_len; + obj += this_len; + this_len = min(len, (int)subbuf.page_len); + if (this_len) + _copy_to_pages(subbuf.pages, subbuf.page_base, obj, this_len); + len -= this_len; + obj += this_len; + this_len = min(len, (int)subbuf.tail[0].iov_len); + memcpy(subbuf.tail[0].iov_base, obj, this_len); +out: + return status; +} + +int +xdr_decode_word(struct xdr_buf *buf, int base, u32 *obj) { u32 raw; int status; @@ -900,6 +926,14 @@ read_u32_from_xdr_buf(struct xdr_buf *buf, int base, u32 *obj) return 0; } +int +xdr_encode_word(struct xdr_buf *buf, int base, u32 obj) +{ + u32 raw = htonl(obj); + + return write_bytes_to_xdr_buf(buf, base, &raw, sizeof(obj)); +} + /* If the netobj starting offset bytes from the start of xdr_buf is contained * entirely in the head or the tail, set object to point to it; otherwise * try to find space for it at the end of the tail, copy it there, and @@ -910,7 +944,7 @@ xdr_buf_read_netobj(struct xdr_buf *buf, struct xdr_netobj *obj, int offset) u32 tail_offset = buf->head[0].iov_len + buf->page_len; u32 obj_end_offset; - if (read_u32_from_xdr_buf(buf, offset, &obj->len)) + if (xdr_decode_word(buf, offset, &obj->len)) goto out; obj_end_offset = offset + 4 + obj->len; @@ -943,3 +977,219 @@ xdr_buf_read_netobj(struct xdr_buf *buf, struct xdr_netobj *obj, int offset) out: return -1; } + +/* Returns 0 on success, or else a negative error code. */ +static int +xdr_xcode_array2(struct xdr_buf *buf, unsigned int base, + struct xdr_array2_desc *desc, int encode) +{ + char *elem = NULL, *c; + unsigned int copied = 0, todo, avail_here; + struct page **ppages = NULL; + int err; + + if (encode) { + if (xdr_encode_word(buf, base, desc->array_len) != 0) + return -EINVAL; + } else { + if (xdr_decode_word(buf, base, &desc->array_len) != 0 || + (unsigned long) base + 4 + desc->array_len * + desc->elem_size > buf->len) + return -EINVAL; + } + base += 4; + + if (!desc->xcode) + return 0; + + todo = desc->array_len * desc->elem_size; + + /* process head */ + if (todo && base < buf->head->iov_len) { + c = buf->head->iov_base + base; + avail_here = min_t(unsigned int, todo, + buf->head->iov_len - base); + todo -= avail_here; + + while (avail_here >= desc->elem_size) { + err = desc->xcode(desc, c); + if (err) + goto out; + c += desc->elem_size; + avail_here -= desc->elem_size; + } + if (avail_here) { + if (!elem) { + elem = kmalloc(desc->elem_size, GFP_KERNEL); + err = -ENOMEM; + if (!elem) + goto out; + } + if (encode) { + err = desc->xcode(desc, elem); + if (err) + goto out; + memcpy(c, elem, avail_here); + } else + memcpy(elem, c, avail_here); + copied = avail_here; + } + base = buf->head->iov_len; /* align to start of pages */ + } + + /* process pages array */ + base -= buf->head->iov_len; + if (todo && base < buf->page_len) { + unsigned int avail_page; + + avail_here = min(todo, buf->page_len - base); + todo -= avail_here; + + base += buf->page_base; + ppages = buf->pages + (base >> PAGE_CACHE_SHIFT); + base &= ~PAGE_CACHE_MASK; + avail_page = min_t(unsigned int, PAGE_CACHE_SIZE - base, + avail_here); + c = kmap(*ppages) + base; + + while (avail_here) { + avail_here -= avail_page; + if (copied || avail_page < desc->elem_size) { + unsigned int l = min(avail_page, + desc->elem_size - copied); + if (!elem) { + elem = kmalloc(desc->elem_size, + GFP_KERNEL); + err = -ENOMEM; + if (!elem) + goto out; + } + if (encode) { + if (!copied) { + err = desc->xcode(desc, elem); + if (err) + goto out; + } + memcpy(c, elem + copied, l); + copied += l; + if (copied == desc->elem_size) + copied = 0; + } else { + memcpy(elem + copied, c, l); + copied += l; + if (copied == desc->elem_size) { + err = desc->xcode(desc, elem); + if (err) + goto out; + copied = 0; + } + } + avail_page -= l; + c += l; + } + while (avail_page >= desc->elem_size) { + err = desc->xcode(desc, c); + if (err) + goto out; + c += desc->elem_size; + avail_page -= desc->elem_size; + } + if (avail_page) { + unsigned int l = min(avail_page, + desc->elem_size - copied); + if (!elem) { + elem = kmalloc(desc->elem_size, + GFP_KERNEL); + err = -ENOMEM; + if (!elem) + goto out; + } + if (encode) { + if (!copied) { + err = desc->xcode(desc, elem); + if (err) + goto out; + } + memcpy(c, elem + copied, l); + copied += l; + if (copied == desc->elem_size) + copied = 0; + } else { + memcpy(elem + copied, c, l); + copied += l; + if (copied == desc->elem_size) { + err = desc->xcode(desc, elem); + if (err) + goto out; + copied = 0; + } + } + } + if (avail_here) { + kunmap(*ppages); + ppages++; + c = kmap(*ppages); + } + + avail_page = min(avail_here, + (unsigned int) PAGE_CACHE_SIZE); + } + base = buf->page_len; /* align to start of tail */ + } + + /* process tail */ + base -= buf->page_len; + if (todo) { + c = buf->tail->iov_base + base; + if (copied) { + unsigned int l = desc->elem_size - copied; + + if (encode) + memcpy(c, elem + copied, l); + else { + memcpy(elem + copied, c, l); + err = desc->xcode(desc, elem); + if (err) + goto out; + } + todo -= l; + c += l; + } + while (todo) { + err = desc->xcode(desc, c); + if (err) + goto out; + c += desc->elem_size; + todo -= desc->elem_size; + } + } + err = 0; + +out: + if (elem) + kfree(elem); + if (ppages) + kunmap(*ppages); + return err; +} + +int +xdr_decode_array2(struct xdr_buf *buf, unsigned int base, + struct xdr_array2_desc *desc) +{ + if (base >= buf->len) + return -EINVAL; + + return xdr_xcode_array2(buf, base, desc, 0); +} + +int +xdr_encode_array2(struct xdr_buf *buf, unsigned int base, + struct xdr_array2_desc *desc) +{ + if ((unsigned long) base + 4 + desc->array_len * desc->elem_size > + buf->head->iov_len + buf->page_len + buf->tail->iov_len) + return -EINVAL; + + return xdr_xcode_array2(buf, base, desc, 1); +} -- cgit v1.2.3-59-g8ed1b From a838cc49d9a7d5652262a6d1b628628cadffa877 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 22 Jun 2005 17:16:24 +0000 Subject: [PATCH] NFSD: Add NFS3ERR_NOTSUPP to the nfsd error mapping table Add the missing NFS3ERR_NOTSUPP error code (defined in NFSv3) to the system-to-protocol-error table in nfsd. The nfsacl extension uses this error code. Signed-off-by: Andreas Gruenbacher Signed-off-by: Olaf Kirch Signed-off-by: Andrew Morton Signed-off-by: Trond Myklebust --- fs/nfsd/nfsproc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c index 757f9d208034..0aa1b9603d7f 100644 --- a/fs/nfsd/nfsproc.c +++ b/fs/nfsd/nfsproc.c @@ -591,6 +591,7 @@ nfserrno (int errno) { nfserr_dropit, -ENOMEM }, { nfserr_badname, -ESRCH }, { nfserr_io, -ETXTBSY }, + { nfserr_notsupp, -EOPNOTSUPP }, { -1, -EIO } }; int i; -- cgit v1.2.3-59-g8ed1b From 9ba02638e4be28dd4ff724202a640264427c62d1 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 22 Jun 2005 17:16:24 +0000 Subject: [PATCH] RPC: Allow the sunrpc server to multiplex serveral programs on a single port The NFS and NFSACL programs run on the same RPC transport. This patch adds support for this by converting svc_program into a chained list of programs (server-side). Signed-off-by: Andreas Gruenbacher Signed-off-by: Olaf Kirch Signed-off-by: Andrew Morton Signed-off-by: Trond Myklebust --- include/linux/sunrpc/svc.h | 3 ++- net/sunrpc/svc.c | 35 ++++++++++++++++++----------------- 2 files changed, 20 insertions(+), 18 deletions(-) diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 37003970cf2e..facb94488bb1 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -240,9 +240,10 @@ struct svc_deferred_req { }; /* - * RPC program + * List of RPC programs on the same transport endpoint */ struct svc_program { + struct svc_program * pg_next; /* other programs (same xprt) */ u32 pg_prog; /* program number */ unsigned int pg_lovers; /* lowest version */ unsigned int pg_hivers; /* lowest version */ diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index a02d424a7409..e9bd91265f70 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -35,20 +35,24 @@ svc_create(struct svc_program *prog, unsigned int bufsize) if (!(serv = (struct svc_serv *) kmalloc(sizeof(*serv), GFP_KERNEL))) return NULL; memset(serv, 0, sizeof(*serv)); + serv->sv_name = prog->pg_name; serv->sv_program = prog; serv->sv_nrthreads = 1; serv->sv_stats = prog->pg_stats; serv->sv_bufsz = bufsize? bufsize : 4096; - prog->pg_lovers = prog->pg_nvers-1; xdrsize = 0; - for (vers=0; verspg_nvers ; vers++) - if (prog->pg_vers[vers]) { - prog->pg_hivers = vers; - if (prog->pg_lovers > vers) - prog->pg_lovers = vers; - if (prog->pg_vers[vers]->vs_xdrsize > xdrsize) - xdrsize = prog->pg_vers[vers]->vs_xdrsize; - } + while (prog) { + prog->pg_lovers = prog->pg_nvers-1; + for (vers=0; verspg_nvers ; vers++) + if (prog->pg_vers[vers]) { + prog->pg_hivers = vers; + if (prog->pg_lovers > vers) + prog->pg_lovers = vers; + if (prog->pg_vers[vers]->vs_xdrsize > xdrsize) + xdrsize = prog->pg_vers[vers]->vs_xdrsize; + } + prog = prog->pg_next; + } serv->sv_xdrsize = xdrsize; INIT_LIST_HEAD(&serv->sv_threads); INIT_LIST_HEAD(&serv->sv_sockets); @@ -56,8 +60,6 @@ svc_create(struct svc_program *prog, unsigned int bufsize) INIT_LIST_HEAD(&serv->sv_permsocks); spin_lock_init(&serv->sv_lock); - serv->sv_name = prog->pg_name; - /* Remove any stale portmap registrations */ svc_register(serv, 0, 0); @@ -339,7 +341,10 @@ svc_process(struct svc_serv *serv, struct svc_rqst *rqstp) goto sendit; } - if (prog != progp->pg_prog) + for (progp = serv->sv_program; progp; progp = progp->pg_next) + if (prog == progp->pg_prog) + break; + if (progp == NULL) goto err_bad_prog; if (vers >= progp->pg_nvers || @@ -452,11 +457,7 @@ err_bad_auth: goto sendit; err_bad_prog: -#ifdef RPC_PARANOIA - if (prog != 100227 || progp->pg_prog != 100003) - printk("svc: unknown program %d (me %d)\n", prog, progp->pg_prog); - /* else it is just a Solaris client seeing if ACLs are supported */ -#endif + dprintk("svc: unknown program %d\n", prog); serv->sv_stats->rpcbadfmt++; svc_putu32(resv, rpc_prog_unavail); goto sendit; -- cgit v1.2.3-59-g8ed1b From a257cdd0e2179630d3201c32ba14d7fcb3c3a055 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 22 Jun 2005 17:16:26 +0000 Subject: [PATCH] NFSD: Add server support for NFSv3 ACLs. This adds functions for encoding and decoding POSIX ACLs for the NFSACL protocol extension, and the GETACL and SETACL RPCs. The implementation is compatible with NFSACL in Solaris. Signed-off-by: Andreas Gruenbacher Acked-by: Olaf Kirch Signed-off-by: Andrew Morton Signed-off-by: Trond Myklebust --- fs/Kconfig | 24 ++++ fs/Makefile | 1 + fs/nfs_common/Makefile | 7 + fs/nfs_common/nfsacl.c | 257 ++++++++++++++++++++++++++++++++++ fs/nfsd/Makefile | 2 + fs/nfsd/nfs2acl.c | 336 +++++++++++++++++++++++++++++++++++++++++++++ fs/nfsd/nfs3acl.c | 267 +++++++++++++++++++++++++++++++++++ fs/nfsd/nfs3xdr.c | 13 ++ fs/nfsd/nfssvc.c | 27 ++++ fs/nfsd/nfsxdr.c | 11 ++ fs/nfsd/vfs.c | 107 ++++++++++++++- include/linux/nfsacl.h | 58 ++++++++ include/linux/nfsd/nfsd.h | 16 +++ include/linux/nfsd/xdr.h | 4 + include/linux/nfsd/xdr3.h | 26 ++++ include/linux/sunrpc/svc.h | 11 ++ 16 files changed, 1166 insertions(+), 1 deletion(-) create mode 100644 fs/nfs_common/Makefile create mode 100644 fs/nfs_common/nfsacl.c create mode 100644 fs/nfsd/nfs2acl.c create mode 100644 fs/nfsd/nfs3acl.c create mode 100644 include/linux/nfsacl.h diff --git a/fs/Kconfig b/fs/Kconfig index 178e27494b74..d44b04d9b0a9 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -1353,6 +1353,7 @@ config NFSD select LOCKD select SUNRPC select EXPORTFS + select NFS_ACL_SUPPORT if NFSD_V3_ACL || NFSD_V2_ACL help If you want your Linux box to act as an NFS *server*, so that other computers on your local network which support NFS can access certain @@ -1376,6 +1377,10 @@ config NFSD To compile the NFS server support as a module, choose M here: the module will be called nfsd. If unsure, say N. +config NFSD_V2_ACL + bool + depends on NFSD + config NFSD_V3 bool "Provide NFSv3 server support" depends on NFSD @@ -1383,6 +1388,16 @@ config NFSD_V3 If you would like to include the NFSv3 server as well as the NFSv2 server, say Y here. If unsure, say Y. +config NFSD_V3_ACL + bool "Provide server support for the NFSv3 ACL protocol extension" + depends on NFSD_V3 + select NFSD_V2_ACL + help + Implement the NFSv3 ACL protocol extension for manipulating POSIX + Access Control Lists on exported file systems. NFS clients should + be compiled with the NFSv3 ACL protocol extension; see the + CONFIG_NFS_V3_ACL option. If unsure, say N. + config NFSD_V4 bool "Provide NFSv4 server support (EXPERIMENTAL)" depends on NFSD_V3 && EXPERIMENTAL @@ -1427,6 +1442,15 @@ config LOCKD_V4 config EXPORTFS tristate +config NFS_ACL_SUPPORT + tristate + select FS_POSIX_ACL + +config NFS_COMMON + bool + depends on NFSD || NFS_FS + default y + config SUNRPC tristate diff --git a/fs/Makefile b/fs/Makefile index 443f2bc56ccf..fc92e59e9faf 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -31,6 +31,7 @@ obj-$(CONFIG_BINFMT_FLAT) += binfmt_flat.o obj-$(CONFIG_FS_MBCACHE) += mbcache.o obj-$(CONFIG_FS_POSIX_ACL) += posix_acl.o xattr_acl.o +obj-$(CONFIG_NFS_COMMON) += nfs_common/ obj-$(CONFIG_QUOTA) += dquot.o obj-$(CONFIG_QFMT_V1) += quota_v1.o diff --git a/fs/nfs_common/Makefile b/fs/nfs_common/Makefile new file mode 100644 index 000000000000..f689ed82af3a --- /dev/null +++ b/fs/nfs_common/Makefile @@ -0,0 +1,7 @@ +# +# Makefile for Linux filesystem routines that are shared by client and server. +# + +obj-$(CONFIG_NFS_ACL_SUPPORT) += nfs_acl.o + +nfs_acl-objs := nfsacl.o diff --git a/fs/nfs_common/nfsacl.c b/fs/nfs_common/nfsacl.c new file mode 100644 index 000000000000..18c58c32e326 --- /dev/null +++ b/fs/nfs_common/nfsacl.c @@ -0,0 +1,257 @@ +/* + * fs/nfs_common/nfsacl.c + * + * Copyright (C) 2002-2003 Andreas Gruenbacher + */ + +/* + * The Solaris nfsacl protocol represents some ACLs slightly differently + * than POSIX 1003.1e draft 17 does (and we do): + * + * - Minimal ACLs always have an ACL_MASK entry, so they have + * four instead of three entries. + * - The ACL_MASK entry in such minimal ACLs always has the same + * permissions as the ACL_GROUP_OBJ entry. (In extended ACLs + * the ACL_MASK and ACL_GROUP_OBJ entries may differ.) + * - The identifier fields of the ACL_USER_OBJ and ACL_GROUP_OBJ + * entries contain the identifiers of the owner and owning group. + * (In POSIX ACLs we always set them to ACL_UNDEFINED_ID). + * - ACL entries in the kernel are kept sorted in ascending order + * of (e_tag, e_id). Solaris ACLs are unsorted. + */ + +#include +#include +#include +#include +#include +#include + +MODULE_LICENSE("GPL"); + +EXPORT_SYMBOL(nfsacl_encode); +EXPORT_SYMBOL(nfsacl_decode); + +struct nfsacl_encode_desc { + struct xdr_array2_desc desc; + unsigned int count; + struct posix_acl *acl; + int typeflag; + uid_t uid; + gid_t gid; +}; + +static int +xdr_nfsace_encode(struct xdr_array2_desc *desc, void *elem) +{ + struct nfsacl_encode_desc *nfsacl_desc = + (struct nfsacl_encode_desc *) desc; + u32 *p = (u32 *) elem; + + if (nfsacl_desc->count < nfsacl_desc->acl->a_count) { + struct posix_acl_entry *entry = + &nfsacl_desc->acl->a_entries[nfsacl_desc->count++]; + + *p++ = htonl(entry->e_tag | nfsacl_desc->typeflag); + switch(entry->e_tag) { + case ACL_USER_OBJ: + *p++ = htonl(nfsacl_desc->uid); + break; + case ACL_GROUP_OBJ: + *p++ = htonl(nfsacl_desc->gid); + break; + case ACL_USER: + case ACL_GROUP: + *p++ = htonl(entry->e_id); + break; + default: /* Solaris depends on that! */ + *p++ = 0; + break; + } + *p++ = htonl(entry->e_perm & S_IRWXO); + } else { + const struct posix_acl_entry *pa, *pe; + int group_obj_perm = ACL_READ|ACL_WRITE|ACL_EXECUTE; + + FOREACH_ACL_ENTRY(pa, nfsacl_desc->acl, pe) { + if (pa->e_tag == ACL_GROUP_OBJ) { + group_obj_perm = pa->e_perm & S_IRWXO; + break; + } + } + /* fake up ACL_MASK entry */ + *p++ = htonl(ACL_MASK | nfsacl_desc->typeflag); + *p++ = htonl(0); + *p++ = htonl(group_obj_perm); + } + + return 0; +} + +unsigned int +nfsacl_encode(struct xdr_buf *buf, unsigned int base, struct inode *inode, + struct posix_acl *acl, int encode_entries, int typeflag) +{ + int entries = (acl && acl->a_count) ? max_t(int, acl->a_count, 4) : 0; + struct nfsacl_encode_desc nfsacl_desc = { + .desc = { + .elem_size = 12, + .array_len = encode_entries ? entries : 0, + .xcode = xdr_nfsace_encode, + }, + .acl = acl, + .typeflag = typeflag, + .uid = inode->i_uid, + .gid = inode->i_gid, + }; + int err; + + if (entries > NFS_ACL_MAX_ENTRIES || + xdr_encode_word(buf, base, entries)) + return -EINVAL; + err = xdr_encode_array2(buf, base + 4, &nfsacl_desc.desc); + if (!err) + err = 8 + nfsacl_desc.desc.elem_size * + nfsacl_desc.desc.array_len; + return err; +} + +struct nfsacl_decode_desc { + struct xdr_array2_desc desc; + unsigned int count; + struct posix_acl *acl; +}; + +static int +xdr_nfsace_decode(struct xdr_array2_desc *desc, void *elem) +{ + struct nfsacl_decode_desc *nfsacl_desc = + (struct nfsacl_decode_desc *) desc; + u32 *p = (u32 *) elem; + struct posix_acl_entry *entry; + + if (!nfsacl_desc->acl) { + if (desc->array_len > NFS_ACL_MAX_ENTRIES) + return -EINVAL; + nfsacl_desc->acl = posix_acl_alloc(desc->array_len, GFP_KERNEL); + if (!nfsacl_desc->acl) + return -ENOMEM; + nfsacl_desc->count = 0; + } + + entry = &nfsacl_desc->acl->a_entries[nfsacl_desc->count++]; + entry->e_tag = ntohl(*p++) & ~NFS_ACL_DEFAULT; + entry->e_id = ntohl(*p++); + entry->e_perm = ntohl(*p++); + + switch(entry->e_tag) { + case ACL_USER_OBJ: + case ACL_USER: + case ACL_GROUP_OBJ: + case ACL_GROUP: + case ACL_OTHER: + if (entry->e_perm & ~S_IRWXO) + return -EINVAL; + break; + case ACL_MASK: + /* Solaris sometimes sets additonal bits in the mask */ + entry->e_perm &= S_IRWXO; + break; + default: + return -EINVAL; + } + + return 0; +} + +static int +cmp_acl_entry(const void *x, const void *y) +{ + const struct posix_acl_entry *a = x, *b = y; + + if (a->e_tag != b->e_tag) + return a->e_tag - b->e_tag; + else if (a->e_id > b->e_id) + return 1; + else if (a->e_id < b->e_id) + return -1; + else + return 0; +} + +/* + * Convert from a Solaris ACL to a POSIX 1003.1e draft 17 ACL. + */ +static int +posix_acl_from_nfsacl(struct posix_acl *acl) +{ + struct posix_acl_entry *pa, *pe, + *group_obj = NULL, *mask = NULL; + + if (!acl) + return 0; + + sort(acl->a_entries, acl->a_count, sizeof(struct posix_acl_entry), + cmp_acl_entry, NULL); + + /* Clear undefined identifier fields and find the ACL_GROUP_OBJ + and ACL_MASK entries. */ + FOREACH_ACL_ENTRY(pa, acl, pe) { + switch(pa->e_tag) { + case ACL_USER_OBJ: + pa->e_id = ACL_UNDEFINED_ID; + break; + case ACL_GROUP_OBJ: + pa->e_id = ACL_UNDEFINED_ID; + group_obj = pa; + break; + case ACL_MASK: + mask = pa; + /* fall through */ + case ACL_OTHER: + pa->e_id = ACL_UNDEFINED_ID; + break; + } + } + if (acl->a_count == 4 && group_obj && mask && + mask->e_perm == group_obj->e_perm) { + /* remove bogus ACL_MASK entry */ + memmove(mask, mask+1, (3 - (mask - acl->a_entries)) * + sizeof(struct posix_acl_entry)); + acl->a_count = 3; + } + return 0; +} + +unsigned int +nfsacl_decode(struct xdr_buf *buf, unsigned int base, unsigned int *aclcnt, + struct posix_acl **pacl) +{ + struct nfsacl_decode_desc nfsacl_desc = { + .desc = { + .elem_size = 12, + .xcode = pacl ? xdr_nfsace_decode : NULL, + }, + }; + u32 entries; + int err; + + if (xdr_decode_word(buf, base, &entries) || + entries > NFS_ACL_MAX_ENTRIES) + return -EINVAL; + err = xdr_decode_array2(buf, base + 4, &nfsacl_desc.desc); + if (err) + return err; + if (pacl) { + if (entries != nfsacl_desc.desc.array_len || + posix_acl_from_nfsacl(nfsacl_desc.acl) != 0) { + posix_acl_release(nfsacl_desc.acl); + return -EINVAL; + } + *pacl = nfsacl_desc.acl; + } + if (aclcnt) + *aclcnt = entries; + return 8 + nfsacl_desc.desc.elem_size * + nfsacl_desc.desc.array_len; +} diff --git a/fs/nfsd/Makefile b/fs/nfsd/Makefile index b8680a247f8b..9f043f44c92f 100644 --- a/fs/nfsd/Makefile +++ b/fs/nfsd/Makefile @@ -6,7 +6,9 @@ obj-$(CONFIG_NFSD) += nfsd.o nfsd-y := nfssvc.o nfsctl.o nfsproc.o nfsfh.o vfs.o \ export.o auth.o lockd.o nfscache.o nfsxdr.o stats.o +nfsd-$(CONFIG_NFSD_V2_ACL) += nfs2acl.o nfsd-$(CONFIG_NFSD_V3) += nfs3proc.o nfs3xdr.o +nfsd-$(CONFIG_NFSD_V3_ACL) += nfs3acl.o nfsd-$(CONFIG_NFSD_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4idmap.o \ nfs4acl.o nfs4callback.o nfsd-objs := $(nfsd-y) diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c new file mode 100644 index 000000000000..7cbf0682b2f0 --- /dev/null +++ b/fs/nfsd/nfs2acl.c @@ -0,0 +1,336 @@ +/* + * linux/fs/nfsd/nfsacl.c + * + * Process version 2 NFSACL requests. + * + * Copyright (C) 2002-2003 Andreas Gruenbacher + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#define NFSDDBG_FACILITY NFSDDBG_PROC +#define RETURN_STATUS(st) { resp->status = (st); return (st); } + +/* + * NULL call. + */ +static int +nfsacld_proc_null(struct svc_rqst *rqstp, void *argp, void *resp) +{ + return nfs_ok; +} + +/* + * Get the Access and/or Default ACL of a file. + */ +static int nfsacld_proc_getacl(struct svc_rqst * rqstp, + struct nfsd3_getaclargs *argp, struct nfsd3_getaclres *resp) +{ + svc_fh *fh; + struct posix_acl *acl; + int nfserr = 0; + + dprintk("nfsd: GETACL(2acl) %s\n", SVCFH_fmt(&argp->fh)); + + fh = fh_copy(&resp->fh, &argp->fh); + if ((nfserr = fh_verify(rqstp, &resp->fh, 0, MAY_NOP))) + RETURN_STATUS(nfserr_inval); + + if (argp->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT)) + RETURN_STATUS(nfserr_inval); + resp->mask = argp->mask; + + if (resp->mask & (NFS_ACL|NFS_ACLCNT)) { + acl = nfsd_get_posix_acl(fh, ACL_TYPE_ACCESS); + if (IS_ERR(acl)) { + int err = PTR_ERR(acl); + + if (err == -ENODATA || err == -EOPNOTSUPP) + acl = NULL; + else { + nfserr = nfserrno(err); + goto fail; + } + } + if (acl == NULL) { + /* Solaris returns the inode's minimum ACL. */ + + struct inode *inode = fh->fh_dentry->d_inode; + acl = posix_acl_from_mode(inode->i_mode, GFP_KERNEL); + } + resp->acl_access = acl; + } + if (resp->mask & (NFS_DFACL|NFS_DFACLCNT)) { + /* Check how Solaris handles requests for the Default ACL + of a non-directory! */ + + acl = nfsd_get_posix_acl(fh, ACL_TYPE_DEFAULT); + if (IS_ERR(acl)) { + int err = PTR_ERR(acl); + + if (err == -ENODATA || err == -EOPNOTSUPP) + acl = NULL; + else { + nfserr = nfserrno(err); + goto fail; + } + } + resp->acl_default = acl; + } + + /* resp->acl_{access,default} are released in nfssvc_release_getacl. */ + RETURN_STATUS(0); + +fail: + posix_acl_release(resp->acl_access); + posix_acl_release(resp->acl_default); + RETURN_STATUS(nfserr); +} + +/* + * Set the Access and/or Default ACL of a file. + */ +static int nfsacld_proc_setacl(struct svc_rqst * rqstp, + struct nfsd3_setaclargs *argp, + struct nfsd_attrstat *resp) +{ + svc_fh *fh; + int nfserr = 0; + + dprintk("nfsd: SETACL(2acl) %s\n", SVCFH_fmt(&argp->fh)); + + fh = fh_copy(&resp->fh, &argp->fh); + nfserr = fh_verify(rqstp, &resp->fh, 0, MAY_NOP); + + if (!nfserr) { + nfserr = nfserrno( nfsd_set_posix_acl( + fh, ACL_TYPE_ACCESS, argp->acl_access) ); + } + if (!nfserr) { + nfserr = nfserrno( nfsd_set_posix_acl( + fh, ACL_TYPE_DEFAULT, argp->acl_default) ); + } + + /* argp->acl_{access,default} may have been allocated in + nfssvc_decode_setaclargs. */ + posix_acl_release(argp->acl_access); + posix_acl_release(argp->acl_default); + return nfserr; +} + +/* + * Check file attributes + */ +static int nfsacld_proc_getattr(struct svc_rqst * rqstp, + struct nfsd_fhandle *argp, struct nfsd_attrstat *resp) +{ + dprintk("nfsd: GETATTR %s\n", SVCFH_fmt(&argp->fh)); + + fh_copy(&resp->fh, &argp->fh); + return fh_verify(rqstp, &resp->fh, 0, MAY_NOP); +} + +/* + * Check file access + */ +static int nfsacld_proc_access(struct svc_rqst *rqstp, struct nfsd3_accessargs *argp, + struct nfsd3_accessres *resp) +{ + int nfserr; + + dprintk("nfsd: ACCESS(2acl) %s 0x%x\n", + SVCFH_fmt(&argp->fh), + argp->access); + + fh_copy(&resp->fh, &argp->fh); + resp->access = argp->access; + nfserr = nfsd_access(rqstp, &resp->fh, &resp->access, NULL); + return nfserr; +} + +/* + * XDR decode functions + */ +static int nfsaclsvc_decode_getaclargs(struct svc_rqst *rqstp, u32 *p, + struct nfsd3_getaclargs *argp) +{ + if (!(p = nfs2svc_decode_fh(p, &argp->fh))) + return 0; + argp->mask = ntohl(*p); p++; + + return xdr_argsize_check(rqstp, p); +} + + +static int nfsaclsvc_decode_setaclargs(struct svc_rqst *rqstp, u32 *p, + struct nfsd3_setaclargs *argp) +{ + struct kvec *head = rqstp->rq_arg.head; + unsigned int base; + int n; + + if (!(p = nfs2svc_decode_fh(p, &argp->fh))) + return 0; + argp->mask = ntohl(*p++); + if (argp->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT) || + !xdr_argsize_check(rqstp, p)) + return 0; + + base = (char *)p - (char *)head->iov_base; + n = nfsacl_decode(&rqstp->rq_arg, base, NULL, + (argp->mask & NFS_ACL) ? + &argp->acl_access : NULL); + if (n > 0) + n = nfsacl_decode(&rqstp->rq_arg, base + n, NULL, + (argp->mask & NFS_DFACL) ? + &argp->acl_default : NULL); + return (n > 0); +} + +static int nfsaclsvc_decode_fhandleargs(struct svc_rqst *rqstp, u32 *p, + struct nfsd_fhandle *argp) +{ + if (!(p = nfs2svc_decode_fh(p, &argp->fh))) + return 0; + return xdr_argsize_check(rqstp, p); +} + +static int nfsaclsvc_decode_accessargs(struct svc_rqst *rqstp, u32 *p, + struct nfsd3_accessargs *argp) +{ + if (!(p = nfs2svc_decode_fh(p, &argp->fh))) + return 0; + argp->access = ntohl(*p++); + + return xdr_argsize_check(rqstp, p); +} + +/* + * XDR encode functions + */ + +/* GETACL */ +static int nfsaclsvc_encode_getaclres(struct svc_rqst *rqstp, u32 *p, + struct nfsd3_getaclres *resp) +{ + struct dentry *dentry = resp->fh.fh_dentry; + struct inode *inode = dentry->d_inode; + int w = nfsacl_size( + (resp->mask & NFS_ACL) ? resp->acl_access : NULL, + (resp->mask & NFS_DFACL) ? resp->acl_default : NULL); + struct kvec *head = rqstp->rq_res.head; + unsigned int base; + int n; + + if (dentry == NULL || dentry->d_inode == NULL) + return 0; + inode = dentry->d_inode; + + p = nfs2svc_encode_fattr(rqstp, p, &resp->fh); + *p++ = htonl(resp->mask); + if (!xdr_ressize_check(rqstp, p)) + return 0; + base = (char *)p - (char *)head->iov_base; + + rqstp->rq_res.page_len = w; + while (w > 0) { + if (!svc_take_res_page(rqstp)) + return 0; + w -= PAGE_SIZE; + } + + n = nfsacl_encode(&rqstp->rq_res, base, inode, + resp->acl_access, + resp->mask & NFS_ACL, 0); + if (n > 0) + n = nfsacl_encode(&rqstp->rq_res, base + n, inode, + resp->acl_default, + resp->mask & NFS_DFACL, + NFS_ACL_DEFAULT); + if (n <= 0) + return 0; + return 1; +} + +static int nfsaclsvc_encode_attrstatres(struct svc_rqst *rqstp, u32 *p, + struct nfsd_attrstat *resp) +{ + p = nfs2svc_encode_fattr(rqstp, p, &resp->fh); + return xdr_ressize_check(rqstp, p); +} + +/* ACCESS */ +static int nfsaclsvc_encode_accessres(struct svc_rqst *rqstp, u32 *p, + struct nfsd3_accessres *resp) +{ + p = nfs2svc_encode_fattr(rqstp, p, &resp->fh); + *p++ = htonl(resp->access); + return xdr_ressize_check(rqstp, p); +} + +/* + * XDR release functions + */ +static int nfsaclsvc_release_getacl(struct svc_rqst *rqstp, u32 *p, + struct nfsd3_getaclres *resp) +{ + fh_put(&resp->fh); + posix_acl_release(resp->acl_access); + posix_acl_release(resp->acl_default); + return 1; +} + +static int nfsaclsvc_release_fhandle(struct svc_rqst *rqstp, u32 *p, + struct nfsd_fhandle *resp) +{ + fh_put(&resp->fh); + return 1; +} + +#define nfsaclsvc_decode_voidargs NULL +#define nfsaclsvc_encode_voidres NULL +#define nfsaclsvc_release_void NULL +#define nfsd3_fhandleargs nfsd_fhandle +#define nfsd3_attrstatres nfsd_attrstat +#define nfsd3_voidres nfsd3_voidargs +struct nfsd3_voidargs { int dummy; }; + +#define PROC(name, argt, rest, relt, cache, respsize) \ + { (svc_procfunc) nfsacld_proc_##name, \ + (kxdrproc_t) nfsaclsvc_decode_##argt##args, \ + (kxdrproc_t) nfsaclsvc_encode_##rest##res, \ + (kxdrproc_t) nfsaclsvc_release_##relt, \ + sizeof(struct nfsd3_##argt##args), \ + sizeof(struct nfsd3_##rest##res), \ + 0, \ + cache, \ + respsize, \ + } + +#define ST 1 /* status*/ +#define AT 21 /* attributes */ +#define pAT (1+AT) /* post attributes - conditional */ +#define ACL (1+NFS_ACL_MAX_ENTRIES*3) /* Access Control List */ + +static struct svc_procedure nfsd_acl_procedures2[] = { + PROC(null, void, void, void, RC_NOCACHE, ST), + PROC(getacl, getacl, getacl, getacl, RC_NOCACHE, ST+1+2*(1+ACL)), + PROC(setacl, setacl, attrstat, fhandle, RC_NOCACHE, ST+AT), + PROC(getattr, fhandle, attrstat, fhandle, RC_NOCACHE, ST+AT), + PROC(access, access, access, fhandle, RC_NOCACHE, ST+AT+1), +}; + +struct svc_version nfsd_acl_version2 = { + .vs_vers = 2, + .vs_nproc = 5, + .vs_proc = nfsd_acl_procedures2, + .vs_dispatch = nfsd_dispatch, + .vs_xdrsize = NFS3_SVC_XDRSIZE, +}; diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c new file mode 100644 index 000000000000..64ba40572fea --- /dev/null +++ b/fs/nfsd/nfs3acl.c @@ -0,0 +1,267 @@ +/* + * linux/fs/nfsd/nfs3acl.c + * + * Process version 3 NFSACL requests. + * + * Copyright (C) 2002-2003 Andreas Gruenbacher + */ + +#include +#include +#include +#include +#include +#include +#include + +#define RETURN_STATUS(st) { resp->status = (st); return (st); } + +/* + * NULL call. + */ +static int +nfsd3_proc_null(struct svc_rqst *rqstp, void *argp, void *resp) +{ + return nfs_ok; +} + +/* + * Get the Access and/or Default ACL of a file. + */ +static int nfsd3_proc_getacl(struct svc_rqst * rqstp, + struct nfsd3_getaclargs *argp, struct nfsd3_getaclres *resp) +{ + svc_fh *fh; + struct posix_acl *acl; + int nfserr = 0; + + fh = fh_copy(&resp->fh, &argp->fh); + if ((nfserr = fh_verify(rqstp, &resp->fh, 0, MAY_NOP))) + RETURN_STATUS(nfserr_inval); + + if (argp->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT)) + RETURN_STATUS(nfserr_inval); + resp->mask = argp->mask; + + if (resp->mask & (NFS_ACL|NFS_ACLCNT)) { + acl = nfsd_get_posix_acl(fh, ACL_TYPE_ACCESS); + if (IS_ERR(acl)) { + int err = PTR_ERR(acl); + + if (err == -ENODATA || err == -EOPNOTSUPP) + acl = NULL; + else { + nfserr = nfserrno(err); + goto fail; + } + } + if (acl == NULL) { + /* Solaris returns the inode's minimum ACL. */ + + struct inode *inode = fh->fh_dentry->d_inode; + acl = posix_acl_from_mode(inode->i_mode, GFP_KERNEL); + } + resp->acl_access = acl; + } + if (resp->mask & (NFS_DFACL|NFS_DFACLCNT)) { + /* Check how Solaris handles requests for the Default ACL + of a non-directory! */ + + acl = nfsd_get_posix_acl(fh, ACL_TYPE_DEFAULT); + if (IS_ERR(acl)) { + int err = PTR_ERR(acl); + + if (err == -ENODATA || err == -EOPNOTSUPP) + acl = NULL; + else { + nfserr = nfserrno(err); + goto fail; + } + } + resp->acl_default = acl; + } + + /* resp->acl_{access,default} are released in nfs3svc_release_getacl. */ + RETURN_STATUS(0); + +fail: + posix_acl_release(resp->acl_access); + posix_acl_release(resp->acl_default); + RETURN_STATUS(nfserr); +} + +/* + * Set the Access and/or Default ACL of a file. + */ +static int nfsd3_proc_setacl(struct svc_rqst * rqstp, + struct nfsd3_setaclargs *argp, + struct nfsd3_attrstat *resp) +{ + svc_fh *fh; + int nfserr = 0; + + fh = fh_copy(&resp->fh, &argp->fh); + nfserr = fh_verify(rqstp, &resp->fh, 0, MAY_NOP); + + if (!nfserr) { + nfserr = nfserrno( nfsd_set_posix_acl( + fh, ACL_TYPE_ACCESS, argp->acl_access) ); + } + if (!nfserr) { + nfserr = nfserrno( nfsd_set_posix_acl( + fh, ACL_TYPE_DEFAULT, argp->acl_default) ); + } + + /* argp->acl_{access,default} may have been allocated in + nfs3svc_decode_setaclargs. */ + posix_acl_release(argp->acl_access); + posix_acl_release(argp->acl_default); + RETURN_STATUS(nfserr); +} + +/* + * XDR decode functions + */ +static int nfs3svc_decode_getaclargs(struct svc_rqst *rqstp, u32 *p, + struct nfsd3_getaclargs *args) +{ + if (!(p = nfs3svc_decode_fh(p, &args->fh))) + return 0; + args->mask = ntohl(*p); p++; + + return xdr_argsize_check(rqstp, p); +} + + +static int nfs3svc_decode_setaclargs(struct svc_rqst *rqstp, u32 *p, + struct nfsd3_setaclargs *args) +{ + struct kvec *head = rqstp->rq_arg.head; + unsigned int base; + int n; + + if (!(p = nfs3svc_decode_fh(p, &args->fh))) + return 0; + args->mask = ntohl(*p++); + if (args->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT) || + !xdr_argsize_check(rqstp, p)) + return 0; + + base = (char *)p - (char *)head->iov_base; + n = nfsacl_decode(&rqstp->rq_arg, base, NULL, + (args->mask & NFS_ACL) ? + &args->acl_access : NULL); + if (n > 0) + n = nfsacl_decode(&rqstp->rq_arg, base + n, NULL, + (args->mask & NFS_DFACL) ? + &args->acl_default : NULL); + return (n > 0); +} + +/* + * XDR encode functions + */ + +/* GETACL */ +static int nfs3svc_encode_getaclres(struct svc_rqst *rqstp, u32 *p, + struct nfsd3_getaclres *resp) +{ + struct dentry *dentry = resp->fh.fh_dentry; + + p = nfs3svc_encode_post_op_attr(rqstp, p, &resp->fh); + if (resp->status == 0 && dentry && dentry->d_inode) { + struct inode *inode = dentry->d_inode; + int w = nfsacl_size( + (resp->mask & NFS_ACL) ? resp->acl_access : NULL, + (resp->mask & NFS_DFACL) ? resp->acl_default : NULL); + struct kvec *head = rqstp->rq_res.head; + unsigned int base; + int n; + + *p++ = htonl(resp->mask); + if (!xdr_ressize_check(rqstp, p)) + return 0; + base = (char *)p - (char *)head->iov_base; + + rqstp->rq_res.page_len = w; + while (w > 0) { + if (!svc_take_res_page(rqstp)) + return 0; + w -= PAGE_SIZE; + } + + n = nfsacl_encode(&rqstp->rq_res, base, inode, + resp->acl_access, + resp->mask & NFS_ACL, 0); + if (n > 0) + n = nfsacl_encode(&rqstp->rq_res, base + n, inode, + resp->acl_default, + resp->mask & NFS_DFACL, + NFS_ACL_DEFAULT); + if (n <= 0) + return 0; + } else + if (!xdr_ressize_check(rqstp, p)) + return 0; + + return 1; +} + +/* SETACL */ +static int nfs3svc_encode_setaclres(struct svc_rqst *rqstp, u32 *p, + struct nfsd3_attrstat *resp) +{ + p = nfs3svc_encode_post_op_attr(rqstp, p, &resp->fh); + + return xdr_ressize_check(rqstp, p); +} + +/* + * XDR release functions + */ +static int nfs3svc_release_getacl(struct svc_rqst *rqstp, u32 *p, + struct nfsd3_getaclres *resp) +{ + fh_put(&resp->fh); + posix_acl_release(resp->acl_access); + posix_acl_release(resp->acl_default); + return 1; +} + +#define nfs3svc_decode_voidargs NULL +#define nfs3svc_release_void NULL +#define nfsd3_setaclres nfsd3_attrstat +#define nfsd3_voidres nfsd3_voidargs +struct nfsd3_voidargs { int dummy; }; + +#define PROC(name, argt, rest, relt, cache, respsize) \ + { (svc_procfunc) nfsd3_proc_##name, \ + (kxdrproc_t) nfs3svc_decode_##argt##args, \ + (kxdrproc_t) nfs3svc_encode_##rest##res, \ + (kxdrproc_t) nfs3svc_release_##relt, \ + sizeof(struct nfsd3_##argt##args), \ + sizeof(struct nfsd3_##rest##res), \ + 0, \ + cache, \ + respsize, \ + } + +#define ST 1 /* status*/ +#define AT 21 /* attributes */ +#define pAT (1+AT) /* post attributes - conditional */ +#define ACL (1+NFS_ACL_MAX_ENTRIES*3) /* Access Control List */ + +static struct svc_procedure nfsd_acl_procedures3[] = { + PROC(null, void, void, void, RC_NOCACHE, ST), + PROC(getacl, getacl, getacl, getacl, RC_NOCACHE, ST+1+2*(1+ACL)), + PROC(setacl, setacl, setacl, fhandle, RC_NOCACHE, ST+pAT), +}; + +struct svc_version nfsd_acl_version3 = { + .vs_vers = 3, + .vs_nproc = 3, + .vs_proc = nfsd_acl_procedures3, + .vs_dispatch = nfsd_dispatch, + .vs_xdrsize = NFS3_SVC_XDRSIZE, +}; + diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index 11f806835c5a..e0e134d6baba 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c @@ -71,6 +71,12 @@ decode_fh(u32 *p, struct svc_fh *fhp) return p + XDR_QUADLEN(size); } +/* Helper function for NFSv3 ACL code */ +u32 *nfs3svc_decode_fh(u32 *p, struct svc_fh *fhp) +{ + return decode_fh(p, fhp); +} + static inline u32 * encode_fh(u32 *p, struct svc_fh *fhp) { @@ -233,6 +239,13 @@ encode_post_op_attr(struct svc_rqst *rqstp, u32 *p, struct svc_fh *fhp) return p; } +/* Helper for NFSv3 ACLs */ +u32 * +nfs3svc_encode_post_op_attr(struct svc_rqst *rqstp, u32 *p, struct svc_fh *fhp) +{ + return encode_post_op_attr(rqstp, p, fhp); +} + /* * Enocde weak cache consistency data */ diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 02ded7cfbdcf..79b25b19fec8 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -31,6 +31,7 @@ #include #include #include +#include #define NFSDDBG_FACILITY NFSDDBG_SVC @@ -362,6 +363,31 @@ nfsd_dispatch(struct svc_rqst *rqstp, u32 *statp) return 1; } +#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) +static struct svc_stat nfsd_acl_svcstats; +static struct svc_version * nfsd_acl_version[] = { + [2] = &nfsd_acl_version2, + [3] = &nfsd_acl_version3, +}; + +#define NFSD_ACL_NRVERS (sizeof(nfsd_acl_version)/sizeof(nfsd_acl_version[0])) +static struct svc_program nfsd_acl_program = { + .pg_prog = NFS_ACL_PROGRAM, + .pg_nvers = NFSD_ACL_NRVERS, + .pg_vers = nfsd_acl_version, + .pg_name = "nfsd", + .pg_stats = &nfsd_acl_svcstats, +}; + +static struct svc_stat nfsd_acl_svcstats = { + .program = &nfsd_acl_program, +}; + +#define nfsd_acl_program_p &nfsd_acl_program +#else +#define nfsd_acl_program_p NULL +#endif /* defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) */ + extern struct svc_version nfsd_version2, nfsd_version3, nfsd_version4; static struct svc_version * nfsd_version[] = { @@ -376,6 +402,7 @@ static struct svc_version * nfsd_version[] = { #define NFSD_NRVERS (sizeof(nfsd_version)/sizeof(nfsd_version[0])) struct svc_program nfsd_program = { + .pg_next = nfsd_acl_program_p, .pg_prog = NFS_PROGRAM, /* program number */ .pg_nvers = NFSD_NRVERS, /* nr of entries in nfsd_version */ .pg_vers = nfsd_version, /* version table */ diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c index 948b08287c99..b45999ff33e6 100644 --- a/fs/nfsd/nfsxdr.c +++ b/fs/nfsd/nfsxdr.c @@ -49,6 +49,12 @@ decode_fh(u32 *p, struct svc_fh *fhp) return p + (NFS_FHSIZE >> 2); } +/* Helper function for NFSv2 ACL code */ +u32 *nfs2svc_decode_fh(u32 *p, struct svc_fh *fhp) +{ + return decode_fh(p, fhp); +} + static inline u32 * encode_fh(u32 *p, struct svc_fh *fhp) { @@ -190,6 +196,11 @@ encode_fattr(struct svc_rqst *rqstp, u32 *p, struct svc_fh *fhp) return p; } +/* Helper function for NFSv2 ACL code */ +u32 *nfs2svc_encode_fattr(struct svc_rqst *rqstp, u32 *p, struct svc_fh *fhp) +{ + return encode_fattr(rqstp, p, fhp); +} /* * XDR decode functions diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index e3e9d217236e..ae3940dc85cc 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -46,8 +46,9 @@ #include #include #include -#ifdef CONFIG_NFSD_V4 +#include #include +#ifdef CONFIG_NFSD_V4 #include #include #include @@ -1857,3 +1858,107 @@ nfsd_racache_init(int cache_size) nfsdstats.ra_size = cache_size; return 0; } + +#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) +struct posix_acl * +nfsd_get_posix_acl(struct svc_fh *fhp, int type) +{ + struct inode *inode = fhp->fh_dentry->d_inode; + char *name; + void *value = NULL; + ssize_t size; + struct posix_acl *acl; + + if (!IS_POSIXACL(inode) || !inode->i_op || !inode->i_op->getxattr) + return ERR_PTR(-EOPNOTSUPP); + switch(type) { + case ACL_TYPE_ACCESS: + name = XATTR_NAME_ACL_ACCESS; + break; + case ACL_TYPE_DEFAULT: + name = XATTR_NAME_ACL_DEFAULT; + break; + default: + return ERR_PTR(-EOPNOTSUPP); + } + + size = inode->i_op->getxattr(fhp->fh_dentry, name, NULL, 0); + + if (size < 0) { + acl = ERR_PTR(size); + goto getout; + } else if (size > 0) { + value = kmalloc(size, GFP_KERNEL); + if (!value) { + acl = ERR_PTR(-ENOMEM); + goto getout; + } + size = inode->i_op->getxattr(fhp->fh_dentry, name, value, size); + if (size < 0) { + acl = ERR_PTR(size); + goto getout; + } + } + acl = posix_acl_from_xattr(value, size); + +getout: + kfree(value); + return acl; +} + +int +nfsd_set_posix_acl(struct svc_fh *fhp, int type, struct posix_acl *acl) +{ + struct inode *inode = fhp->fh_dentry->d_inode; + char *name; + void *value = NULL; + size_t size; + int error; + + if (!IS_POSIXACL(inode) || !inode->i_op || + !inode->i_op->setxattr || !inode->i_op->removexattr) + return -EOPNOTSUPP; + switch(type) { + case ACL_TYPE_ACCESS: + name = XATTR_NAME_ACL_ACCESS; + break; + case ACL_TYPE_DEFAULT: + name = XATTR_NAME_ACL_DEFAULT; + break; + default: + return -EOPNOTSUPP; + } + + if (acl && acl->a_count) { + size = xattr_acl_size(acl->a_count); + value = kmalloc(size, GFP_KERNEL); + if (!value) + return -ENOMEM; + size = posix_acl_to_xattr(acl, value, size); + if (size < 0) { + error = size; + goto getout; + } + } else + size = 0; + + if (!fhp->fh_locked) + fh_lock(fhp); /* unlocking is done automatically */ + if (size) + error = inode->i_op->setxattr(fhp->fh_dentry, name, + value, size, 0); + else { + if (!S_ISDIR(inode->i_mode) && type == ACL_TYPE_DEFAULT) + error = 0; + else { + error = inode->i_op->removexattr(fhp->fh_dentry, name); + if (error == -ENODATA) + error = 0; + } + } + +getout: + kfree(value); + return error; +} +#endif /* defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) */ diff --git a/include/linux/nfsacl.h b/include/linux/nfsacl.h new file mode 100644 index 000000000000..54487a99beb8 --- /dev/null +++ b/include/linux/nfsacl.h @@ -0,0 +1,58 @@ +/* + * File: linux/nfsacl.h + * + * (C) 2003 Andreas Gruenbacher + */ +#ifndef __LINUX_NFSACL_H +#define __LINUX_NFSACL_H + +#define NFS_ACL_PROGRAM 100227 + +#define ACLPROC2_GETACL 1 +#define ACLPROC2_SETACL 2 +#define ACLPROC2_GETATTR 3 +#define ACLPROC2_ACCESS 4 + +#define ACLPROC3_GETACL 1 +#define ACLPROC3_SETACL 2 + + +/* Flags for the getacl/setacl mode */ +#define NFS_ACL 0x0001 +#define NFS_ACLCNT 0x0002 +#define NFS_DFACL 0x0004 +#define NFS_DFACLCNT 0x0008 + +/* Flag for Default ACL entries */ +#define NFS_ACL_DEFAULT 0x1000 + +#ifdef __KERNEL__ + +#include + +/* Maximum number of ACL entries over NFS */ +#define NFS_ACL_MAX_ENTRIES 1024 + +#define NFSACL_MAXWORDS (2*(2+3*NFS_ACL_MAX_ENTRIES)) +#define NFSACL_MAXPAGES ((2*(8+12*NFS_ACL_MAX_ENTRIES) + PAGE_SIZE-1) \ + >> PAGE_SHIFT) + +static inline unsigned int +nfsacl_size(struct posix_acl *acl_access, struct posix_acl *acl_default) +{ + unsigned int w = 16; + w += max(acl_access ? (int)acl_access->a_count : 3, 4) * 12; + if (acl_default) + w += max((int)acl_default->a_count, 4) * 12; + return w; +} + +extern unsigned int +nfsacl_encode(struct xdr_buf *buf, unsigned int base, struct inode *inode, + struct posix_acl *acl, int encode_entries, int typeflag); +extern unsigned int +nfsacl_decode(struct xdr_buf *buf, unsigned int base, unsigned int *aclcnt, + struct posix_acl **pacl); + +#endif /* __KERNEL__ */ +#endif /* __LINUX_NFSACL_H */ diff --git a/include/linux/nfsd/nfsd.h b/include/linux/nfsd/nfsd.h index 8f85d9a59607..4bf931d5ff56 100644 --- a/include/linux/nfsd/nfsd.h +++ b/include/linux/nfsd/nfsd.h @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -124,6 +125,21 @@ int nfsd_statfs(struct svc_rqst *, struct svc_fh *, int nfsd_notify_change(struct inode *, struct iattr *); int nfsd_permission(struct svc_export *, struct dentry *, int); +#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) +#ifdef CONFIG_NFSD_V2_ACL +extern struct svc_version nfsd_acl_version2; +#else +#define nfsd_acl_version2 NULL +#endif +#ifdef CONFIG_NFSD_V3_ACL +extern struct svc_version nfsd_acl_version3; +#else +#define nfsd_acl_version3 NULL +#endif +struct posix_acl *nfsd_get_posix_acl(struct svc_fh *, int); +int nfsd_set_posix_acl(struct svc_fh *, int, struct posix_acl *); +#endif + /* * NFSv4 State diff --git a/include/linux/nfsd/xdr.h b/include/linux/nfsd/xdr.h index ecccef777dae..130d4f588a37 100644 --- a/include/linux/nfsd/xdr.h +++ b/include/linux/nfsd/xdr.h @@ -169,4 +169,8 @@ int nfssvc_encode_entry(struct readdir_cd *, const char *name, int nfssvc_release_fhandle(struct svc_rqst *, u32 *, struct nfsd_fhandle *); +/* Helper functions for NFSv2 ACL code */ +u32 *nfs2svc_encode_fattr(struct svc_rqst *rqstp, u32 *p, struct svc_fh *fhp); +u32 *nfs2svc_decode_fh(u32 *p, struct svc_fh *fhp); + #endif /* LINUX_NFSD_H */ diff --git a/include/linux/nfsd/xdr3.h b/include/linux/nfsd/xdr3.h index 0ae9e0ef5f68..21e18ce7ca63 100644 --- a/include/linux/nfsd/xdr3.h +++ b/include/linux/nfsd/xdr3.h @@ -110,6 +110,19 @@ struct nfsd3_commitargs { __u32 count; }; +struct nfsd3_getaclargs { + struct svc_fh fh; + int mask; +}; + +struct posix_acl; +struct nfsd3_setaclargs { + struct svc_fh fh; + int mask; + struct posix_acl *acl_access; + struct posix_acl *acl_default; +}; + struct nfsd3_attrstat { __u32 status; struct svc_fh fh; @@ -209,6 +222,14 @@ struct nfsd3_commitres { struct svc_fh fh; }; +struct nfsd3_getaclres { + __u32 status; + struct svc_fh fh; + int mask; + struct posix_acl *acl_access; + struct posix_acl *acl_default; +}; + /* dummy type for release */ struct nfsd3_fhandle_pair { __u32 dummy; @@ -241,6 +262,7 @@ union nfsd3_xdrstore { struct nfsd3_fsinfores fsinfores; struct nfsd3_pathconfres pathconfres; struct nfsd3_commitres commitres; + struct nfsd3_getaclres getaclres; }; #define NFS3_SVC_XDRSIZE sizeof(union nfsd3_xdrstore) @@ -316,6 +338,10 @@ int nfs3svc_encode_entry(struct readdir_cd *, const char *name, int nfs3svc_encode_entry_plus(struct readdir_cd *, const char *name, int namlen, loff_t offset, ino_t ino, unsigned int); +/* Helper functions for NFSv3 ACL code */ +u32 *nfs3svc_encode_post_op_attr(struct svc_rqst *rqstp, u32 *p, + struct svc_fh *fhp); +u32 *nfs3svc_decode_fh(u32 *p, struct svc_fh *fhp); #endif /* _LINUX_NFSD_XDR3_H */ diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index facb94488bb1..5af8800e0ce3 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -185,6 +185,17 @@ xdr_ressize_check(struct svc_rqst *rqstp, u32 *p) return vec->iov_len <= PAGE_SIZE; } +static inline struct page * +svc_take_res_page(struct svc_rqst *rqstp) +{ + if (rqstp->rq_arghi <= rqstp->rq_argused) + return NULL; + rqstp->rq_arghi--; + rqstp->rq_respages[rqstp->rq_resused] = + rqstp->rq_argpages[rqstp->rq_arghi]; + return rqstp->rq_respages[rqstp->rq_resused++]; +} + static inline int svc_take_page(struct svc_rqst *rqstp) { if (rqstp->rq_arghi <= rqstp->rq_argused) -- cgit v1.2.3-59-g8ed1b From b7fa0554cf1ba6d6895cd0a5b02989a26e0bc704 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 22 Jun 2005 17:16:27 +0000 Subject: [PATCH] NFS: Add support for NFSv3 ACLs This adds acl support fo nfs clients via the NFSACL protocol extension, by implementing the getxattr, listxattr, setxattr, and removexattr iops for the system.posix_acl_access and system.posix_acl_default attributes. This patch implements a dumb version that uses no caching (and thus adds some overhead). (Another patch in this patchset adds caching as well.) Signed-off-by: Andreas Gruenbacher Acked-by: Olaf Kirch Signed-off-by: Andrew Morton Signed-off-by: Trond Myklebust --- fs/Kconfig | 11 ++ fs/nfs/Makefile | 1 + fs/nfs/dir.c | 21 ++++ fs/nfs/file.c | 12 ++ fs/nfs/inode.c | 36 +++++- fs/nfs/nfs3acl.c | 303 ++++++++++++++++++++++++++++++++++++++++++++++ fs/nfs/nfs3proc.c | 7 +- fs/nfs/nfs3xdr.c | 147 ++++++++++++++++++++++ fs/nfs/nfsroot.c | 9 ++ include/linux/nfs_fs.h | 31 +++++ include/linux/nfs_fs_sb.h | 1 + include/linux/nfs_mount.h | 1 + include/linux/nfs_xdr.h | 27 +++++ 13 files changed, 601 insertions(+), 6 deletions(-) create mode 100644 fs/nfs/nfs3acl.c diff --git a/fs/Kconfig b/fs/Kconfig index d44b04d9b0a9..a7c0cc3203cb 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -1268,6 +1268,7 @@ config NFS_FS depends on INET select LOCKD select SUNRPC + select NFS_ACL_SUPPORT if NFS_V3_ACL help If you are connected to some other (usually local) Unix computer (using SLIP, PLIP, PPP or Ethernet) and want to mount files residing @@ -1310,6 +1311,16 @@ config NFS_V3 If unsure, say Y. +config NFS_V3_ACL + bool "Provide client support for the NFSv3 ACL protocol extension" + depends on NFS_V3 + help + Implement the NFSv3 ACL protocol extension for manipulating POSIX + Access Control Lists. The server should also be compiled with + the NFSv3 ACL protocol extension; see the CONFIG_NFSD_V3_ACL option. + + If unsure, say N. + config NFS_V4 bool "Provide NFSv4 client support (EXPERIMENTAL)" depends on NFS_FS && EXPERIMENTAL diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile index b4baa031edf4..8b3bb715d177 100644 --- a/fs/nfs/Makefile +++ b/fs/nfs/Makefile @@ -8,6 +8,7 @@ nfs-y := dir.o file.o inode.o nfs2xdr.o pagelist.o \ proc.o read.o symlink.o unlink.o write.o nfs-$(CONFIG_ROOT_NFS) += nfsroot.o mount_clnt.o nfs-$(CONFIG_NFS_V3) += nfs3proc.o nfs3xdr.o +nfs-$(CONFIG_NFS_V3_ACL) += nfs3acl.o nfs-$(CONFIG_NFS_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o \ delegation.o idmap.o \ callback.o callback_xdr.o callback_proc.o diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 5720537bffdd..2c6a95945684 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -75,6 +75,27 @@ struct inode_operations nfs_dir_inode_operations = { .setattr = nfs_setattr, }; +#ifdef CONFIG_NFS_V3 +struct inode_operations nfs3_dir_inode_operations = { + .create = nfs_create, + .lookup = nfs_lookup, + .link = nfs_link, + .unlink = nfs_unlink, + .symlink = nfs_symlink, + .mkdir = nfs_mkdir, + .rmdir = nfs_rmdir, + .mknod = nfs_mknod, + .rename = nfs_rename, + .permission = nfs_permission, + .getattr = nfs_getattr, + .setattr = nfs_setattr, + .listxattr = nfs3_listxattr, + .getxattr = nfs3_getxattr, + .setxattr = nfs3_setxattr, + .removexattr = nfs3_removexattr, +}; +#endif /* CONFIG_NFS_V3 */ + #ifdef CONFIG_NFS_V4 static struct dentry *nfs_atomic_lookup(struct inode *, struct dentry *, struct nameidata *); diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 55c907592490..a606708264ed 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -71,6 +71,18 @@ struct inode_operations nfs_file_inode_operations = { .setattr = nfs_setattr, }; +#ifdef CONFIG_NFS_V3 +struct inode_operations nfs3_file_inode_operations = { + .permission = nfs_permission, + .getattr = nfs_getattr, + .setattr = nfs_setattr, + .listxattr = nfs3_listxattr, + .getxattr = nfs3_getxattr, + .setxattr = nfs3_setxattr, + .removexattr = nfs3_removexattr, +}; +#endif /* CONFIG_NFS_v3 */ + /* Hack for future NFS swap support */ #ifndef IS_SWAPFILE # define IS_SWAPFILE(inode) (0) diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 97b3fe7ece63..440b9cbb6f81 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -108,6 +108,21 @@ static struct rpc_program nfs_program = { .pipe_dir_name = "/nfs", }; +#ifdef CONFIG_NFS_V3_ACL +static struct rpc_stat nfsacl_rpcstat = { &nfsacl_program }; +static struct rpc_version * nfsacl_version[] = { + [3] = &nfsacl_version3, +}; + +struct rpc_program nfsacl_program = { + .name = "nfsacl", + .number = NFS_ACL_PROGRAM, + .nrvers = sizeof(nfsacl_version) / sizeof(nfsacl_version[0]), + .version = nfsacl_version, + .stats = &nfsacl_rpcstat, +}; +#endif /* CONFIG_NFS_V3_ACL */ + static inline unsigned long nfs_fattr_to_ino_t(struct nfs_fattr *fattr) { @@ -165,6 +180,9 @@ nfs_umount_begin(struct super_block *sb) /* -EIO all pending I/O */ if (!IS_ERR(rpc)) rpc_killall_tasks(rpc); + rpc = NFS_SB(sb)->client_acl; + if (!IS_ERR(rpc)) + rpc_killall_tasks(rpc); } @@ -461,8 +479,17 @@ nfs_fill_super(struct super_block *sb, struct nfs_mount_data *data, int silent) atomic_inc(&server->client->cl_count); server->client_sys = server->client; } - if (server->flags & NFS_MOUNT_VER3) { +#ifdef CONFIG_NFS_V3_ACL + if (!(server->flags & NFS_MOUNT_NOACL)) { + server->client_acl = rpc_bind_new_program(server->client, &nfsacl_program, 3); + /* No errors! Assume that Sun nfsacls are supported */ + if (!IS_ERR(server->client_acl)) + server->caps |= NFS_CAP_ACLS; + } +#else + server->flags &= ~NFS_MOUNT_NOACL; +#endif /* CONFIG_NFS_V3_ACL */ if (server->namelen == 0 || server->namelen > NFS3_MAXNAMLEN) server->namelen = NFS3_MAXNAMLEN; sb->s_time_gran = 1; @@ -546,6 +573,7 @@ static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt) { NFS_MOUNT_NOCTO, ",nocto", "" }, { NFS_MOUNT_NOAC, ",noac", "" }, { NFS_MOUNT_NONLM, ",nolock", ",lock" }, + { NFS_MOUNT_NOACL, ",noacl", "" }, { 0, NULL, NULL } }; struct proc_nfs_info *nfs_infop; @@ -1452,7 +1480,7 @@ static struct super_block *nfs_get_sb(struct file_system_type *fs_type, memset(server, 0, sizeof(struct nfs_server)); /* Zero out the NFS state stuff */ init_nfsv4_state(server); - server->client = server->client_sys = ERR_PTR(-EINVAL); + server->client = server->client_sys = server->client_acl = ERR_PTR(-EINVAL); root = &server->fh; if (data->flags & NFS_MOUNT_VER3) @@ -1513,6 +1541,8 @@ static void nfs_kill_super(struct super_block *s) rpc_shutdown_client(server->client); if (!IS_ERR(server->client_sys)) rpc_shutdown_client(server->client_sys); + if (!IS_ERR(server->client_acl)) + rpc_shutdown_client(server->client_acl); if (!(server->flags & NFS_MOUNT_NONLM)) lockd_down(); /* release rpc.lockd */ @@ -1794,7 +1824,7 @@ static struct super_block *nfs4_get_sb(struct file_system_type *fs_type, memset(server, 0, sizeof(struct nfs_server)); /* Zero out the NFS state stuff */ init_nfsv4_state(server); - server->client = server->client_sys = ERR_PTR(-EINVAL); + server->client = server->client_sys = server->client_acl = ERR_PTR(-EINVAL); p = nfs_copy_user_string(NULL, &data->hostname, 256); if (IS_ERR(p)) diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c new file mode 100644 index 000000000000..393ba79fc14f --- /dev/null +++ b/fs/nfs/nfs3acl.c @@ -0,0 +1,303 @@ +#include +#include +#include +#include +#include +#include + +#define NFSDBG_FACILITY NFSDBG_PROC + +ssize_t nfs3_listxattr(struct dentry *dentry, char *buffer, size_t size) +{ + struct inode *inode = dentry->d_inode; + struct posix_acl *acl; + int pos=0, len=0; + +# define output(s) do { \ + if (pos + sizeof(s) <= size) { \ + memcpy(buffer + pos, s, sizeof(s)); \ + pos += sizeof(s); \ + } \ + len += sizeof(s); \ + } while(0) + + acl = nfs3_proc_getacl(inode, ACL_TYPE_ACCESS); + if (IS_ERR(acl)) + return PTR_ERR(acl); + if (acl) { + output("system.posix_acl_access"); + posix_acl_release(acl); + } + + if (S_ISDIR(inode->i_mode)) { + acl = nfs3_proc_getacl(inode, ACL_TYPE_DEFAULT); + if (IS_ERR(acl)) + return PTR_ERR(acl); + if (acl) { + output("system.posix_acl_default"); + posix_acl_release(acl); + } + } + +# undef output + + if (!buffer || len <= size) + return len; + return -ERANGE; +} + +ssize_t nfs3_getxattr(struct dentry *dentry, const char *name, + void *buffer, size_t size) +{ + struct inode *inode = dentry->d_inode; + struct posix_acl *acl; + int type, error = 0; + + if (strcmp(name, XATTR_NAME_ACL_ACCESS) == 0) + type = ACL_TYPE_ACCESS; + else if (strcmp(name, XATTR_NAME_ACL_DEFAULT) == 0) + type = ACL_TYPE_DEFAULT; + else + return -EOPNOTSUPP; + + acl = nfs3_proc_getacl(inode, type); + if (IS_ERR(acl)) + return PTR_ERR(acl); + else if (acl) { + if (type == ACL_TYPE_ACCESS && acl->a_count == 0) + error = -ENODATA; + else + error = posix_acl_to_xattr(acl, buffer, size); + posix_acl_release(acl); + } else + error = -ENODATA; + + return error; +} + +int nfs3_setxattr(struct dentry *dentry, const char *name, + const void *value, size_t size, int flags) +{ + struct inode *inode = dentry->d_inode; + struct posix_acl *acl; + int type, error; + + if (strcmp(name, XATTR_NAME_ACL_ACCESS) == 0) + type = ACL_TYPE_ACCESS; + else if (strcmp(name, XATTR_NAME_ACL_DEFAULT) == 0) + type = ACL_TYPE_DEFAULT; + else + return -EOPNOTSUPP; + + acl = posix_acl_from_xattr(value, size); + if (IS_ERR(acl)) + return PTR_ERR(acl); + error = nfs3_proc_setacl(inode, type, acl); + posix_acl_release(acl); + + return error; +} + +int nfs3_removexattr(struct dentry *dentry, const char *name) +{ + struct inode *inode = dentry->d_inode; + int type; + + if (strcmp(name, XATTR_NAME_ACL_ACCESS) == 0) + type = ACL_TYPE_ACCESS; + else if (strcmp(name, XATTR_NAME_ACL_DEFAULT) == 0) + type = ACL_TYPE_DEFAULT; + else + return -EOPNOTSUPP; + + return nfs3_proc_setacl(inode, type, NULL); +} + +struct posix_acl *nfs3_proc_getacl(struct inode *inode, int type) +{ + struct nfs_server *server = NFS_SERVER(inode); + struct nfs_fattr fattr; + struct page *pages[NFSACL_MAXPAGES] = { }; + struct nfs3_getaclargs args = { + .fh = NFS_FH(inode), + /* The xdr layer may allocate pages here. */ + .pages = pages, + }; + struct nfs3_getaclres res = { + .fattr = &fattr, + }; + struct posix_acl *acl = NULL; + int status, count; + + if (!nfs_server_capable(inode, NFS_CAP_ACLS)) + return ERR_PTR(-EOPNOTSUPP); + + switch (type) { + case ACL_TYPE_ACCESS: + args.mask = NFS_ACLCNT|NFS_ACL; + break; + + case ACL_TYPE_DEFAULT: + if (!S_ISDIR(inode->i_mode)) + return NULL; + args.mask = NFS_DFACLCNT|NFS_DFACL; + break; + + default: + return ERR_PTR(-EINVAL); + } + + dprintk("NFS call getacl\n"); + status = rpc_call(server->client_acl, ACLPROC3_GETACL, + &args, &res, 0); + dprintk("NFS reply getacl: %d\n", status); + + /* pages may have been allocated at the xdr layer. */ + for (count = 0; count < NFSACL_MAXPAGES && args.pages[count]; count++) + __free_page(args.pages[count]); + + switch (status) { + case 0: + status = nfs_refresh_inode(inode, &fattr); + break; + case -EPFNOSUPPORT: + case -EPROTONOSUPPORT: + dprintk("NFS_V3_ACL extension not supported; disabling\n"); + server->caps &= ~NFS_CAP_ACLS; + case -ENOTSUPP: + status = -EOPNOTSUPP; + default: + goto getout; + } + if ((args.mask & res.mask) != args.mask) { + status = -EIO; + goto getout; + } + + if (res.acl_access != NULL) { + if (posix_acl_equiv_mode(res.acl_access, NULL) == 0) { + posix_acl_release(res.acl_access); + res.acl_access = NULL; + } + } + + switch(type) { + case ACL_TYPE_ACCESS: + acl = res.acl_access; + res.acl_access = NULL; + break; + + case ACL_TYPE_DEFAULT: + acl = res.acl_default; + res.acl_default = NULL; + } + +getout: + posix_acl_release(res.acl_access); + posix_acl_release(res.acl_default); + + if (status != 0) { + posix_acl_release(acl); + acl = ERR_PTR(status); + } + return acl; +} + +static int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl, + struct posix_acl *dfacl) +{ + struct nfs_server *server = NFS_SERVER(inode); + struct nfs_fattr fattr; + struct page *pages[NFSACL_MAXPAGES] = { }; + struct nfs3_setaclargs args = { + .inode = inode, + .mask = NFS_ACL, + .acl_access = acl, + .pages = pages, + }; + int status, count; + + status = -EOPNOTSUPP; + if (!nfs_server_capable(inode, NFS_CAP_ACLS)) + goto out; + + /* We are doing this here, because XDR marshalling can only + return -ENOMEM. */ + status = -ENOSPC; + if (acl != NULL && acl->a_count > NFS_ACL_MAX_ENTRIES) + goto out; + if (dfacl != NULL && dfacl->a_count > NFS_ACL_MAX_ENTRIES) + goto out; + if (S_ISDIR(inode->i_mode)) { + args.mask |= NFS_DFACL; + args.acl_default = dfacl; + } + + dprintk("NFS call setacl\n"); + nfs_begin_data_update(inode); + status = rpc_call(server->client_acl, ACLPROC3_SETACL, + &args, &fattr, 0); + NFS_FLAGS(inode) |= NFS_INO_INVALID_ACCESS; + nfs_end_data_update(inode); + dprintk("NFS reply setacl: %d\n", status); + + /* pages may have been allocated at the xdr layer. */ + for (count = 0; count < NFSACL_MAXPAGES && args.pages[count]; count++) + __free_page(args.pages[count]); + + switch (status) { + case 0: + status = nfs_refresh_inode(inode, &fattr); + break; + case -EPFNOSUPPORT: + case -EPROTONOSUPPORT: + dprintk("NFS_V3_ACL SETACL RPC not supported" + "(will not retry)\n"); + server->caps &= ~NFS_CAP_ACLS; + case -ENOTSUPP: + status = -EOPNOTSUPP; + } +out: + return status; +} + +int nfs3_proc_setacl(struct inode *inode, int type, struct posix_acl *acl) +{ + struct posix_acl *alloc = NULL, *dfacl = NULL; + int status; + + if (S_ISDIR(inode->i_mode)) { + switch(type) { + case ACL_TYPE_ACCESS: + alloc = dfacl = nfs3_proc_getacl(inode, + ACL_TYPE_DEFAULT); + if (IS_ERR(alloc)) + goto fail; + break; + + case ACL_TYPE_DEFAULT: + dfacl = acl; + alloc = acl = nfs3_proc_getacl(inode, + ACL_TYPE_ACCESS); + if (IS_ERR(alloc)) + goto fail; + break; + + default: + return -EINVAL; + } + } else if (type != ACL_TYPE_ACCESS) + return -EINVAL; + + if (acl == NULL) { + alloc = acl = posix_acl_from_mode(inode->i_mode, GFP_KERNEL); + if (IS_ERR(alloc)) + goto fail; + } + status = nfs3_proc_setacls(inode, acl, dfacl); + posix_acl_release(alloc); + return status; + +fail: + return PTR_ERR(alloc); +} diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 53953a775714..d03bac0cc42f 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -17,6 +17,7 @@ #include #include #include +#include #define NFSDBG_FACILITY NFSDBG_PROC @@ -45,7 +46,7 @@ static inline int nfs3_rpc_call_wrapper(struct rpc_clnt *clnt, u32 proc, void *argp, void *resp, int flags) { struct rpc_message msg = { - .rpc_proc = &nfs3_procedures[proc], + .rpc_proc = &clnt->cl_procinfo[proc], .rpc_argp = argp, .rpc_resp = resp, }; @@ -825,8 +826,8 @@ nfs3_proc_lock(struct file *filp, int cmd, struct file_lock *fl) struct nfs_rpc_ops nfs_v3_clientops = { .version = 3, /* protocol version */ .dentry_ops = &nfs_dentry_operations, - .dir_inode_ops = &nfs_dir_inode_operations, - .file_inode_ops = &nfs_file_inode_operations, + .dir_inode_ops = &nfs3_dir_inode_operations, + .file_inode_ops = &nfs3_file_inode_operations, .getroot = nfs3_proc_get_root, .getattr = nfs3_proc_getattr, .setattr = nfs3_proc_setattr, diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index a3593d47e5ab..a4437fb177f0 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c @@ -21,6 +21,7 @@ #include #include #include +#include #define NFSDBG_FACILITY NFSDBG_XDR @@ -79,6 +80,11 @@ extern int nfs_stat_to_errno(int); #define NFS3_pathconfres_sz (1+NFS3_post_op_attr_sz+6) #define NFS3_commitres_sz (1+NFS3_wcc_data_sz+2) +#define ACL3_getaclargs_sz (NFS3_fh_sz+1) +#define ACL3_setaclargs_sz (NFS3_fh_sz+1+2*(2+5*3)) +#define ACL3_getaclres_sz (1+NFS3_post_op_attr_sz+1+2*(2+5*3)) +#define ACL3_setaclres_sz (1+NFS3_post_op_attr_sz) + /* * Map file type to S_IFMT bits */ @@ -627,6 +633,74 @@ nfs3_xdr_commitargs(struct rpc_rqst *req, u32 *p, struct nfs_writeargs *args) return 0; } +#ifdef CONFIG_NFS_V3_ACL +/* + * Encode GETACL arguments + */ +static int +nfs3_xdr_getaclargs(struct rpc_rqst *req, u32 *p, + struct nfs3_getaclargs *args) +{ + struct rpc_auth *auth = req->rq_task->tk_auth; + unsigned int replen; + + p = xdr_encode_fhandle(p, args->fh); + *p++ = htonl(args->mask); + req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); + + if (args->mask & (NFS_ACL | NFS_DFACL)) { + /* Inline the page array */ + replen = (RPC_REPHDRSIZE + auth->au_rslack + + ACL3_getaclres_sz) << 2; + xdr_inline_pages(&req->rq_rcv_buf, replen, args->pages, 0, + NFSACL_MAXPAGES << PAGE_SHIFT); + } + return 0; +} + +/* + * Encode SETACL arguments + */ +static int +nfs3_xdr_setaclargs(struct rpc_rqst *req, u32 *p, + struct nfs3_setaclargs *args) +{ + struct xdr_buf *buf = &req->rq_snd_buf; + unsigned int base, len_in_head, len = nfsacl_size( + (args->mask & NFS_ACL) ? args->acl_access : NULL, + (args->mask & NFS_DFACL) ? args->acl_default : NULL); + int count, err; + + p = xdr_encode_fhandle(p, NFS_FH(args->inode)); + *p++ = htonl(args->mask); + base = (char *)p - (char *)buf->head->iov_base; + /* put as much of the acls into head as possible. */ + len_in_head = min_t(unsigned int, buf->head->iov_len - base, len); + len -= len_in_head; + req->rq_slen = xdr_adjust_iovec(req->rq_svec, p + len_in_head); + + for (count = 0; (count << PAGE_SHIFT) < len; count++) { + args->pages[count] = alloc_page(GFP_KERNEL); + if (!args->pages[count]) { + while (count) + __free_page(args->pages[--count]); + return -ENOMEM; + } + } + xdr_encode_pages(buf, args->pages, 0, len); + + err = nfsacl_encode(buf, base, args->inode, + (args->mask & NFS_ACL) ? + args->acl_access : NULL, 1, 0); + if (err > 0) + err = nfsacl_encode(buf, base + err, args->inode, + (args->mask & NFS_DFACL) ? + args->acl_default : NULL, 1, + NFS_ACL_DEFAULT); + return (err > 0) ? 0 : err; +} +#endif /* CONFIG_NFS_V3_ACL */ + /* * NFS XDR decode functions */ @@ -978,6 +1052,54 @@ nfs3_xdr_commitres(struct rpc_rqst *req, u32 *p, struct nfs_writeres *res) return 0; } +#ifdef CONFIG_NFS_V3_ACL +/* + * Decode GETACL reply + */ +static int +nfs3_xdr_getaclres(struct rpc_rqst *req, u32 *p, + struct nfs3_getaclres *res) +{ + struct xdr_buf *buf = &req->rq_rcv_buf; + int status = ntohl(*p++); + struct posix_acl **acl; + unsigned int *aclcnt; + int err, base; + + if (status != 0) + return -nfs_stat_to_errno(status); + p = xdr_decode_post_op_attr(p, res->fattr); + res->mask = ntohl(*p++); + if (res->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT)) + return -EINVAL; + base = (char *)p - (char *)req->rq_rcv_buf.head->iov_base; + + acl = (res->mask & NFS_ACL) ? &res->acl_access : NULL; + aclcnt = (res->mask & NFS_ACLCNT) ? &res->acl_access_count : NULL; + err = nfsacl_decode(buf, base, aclcnt, acl); + + acl = (res->mask & NFS_DFACL) ? &res->acl_default : NULL; + aclcnt = (res->mask & NFS_DFACLCNT) ? &res->acl_default_count : NULL; + if (err > 0) + err = nfsacl_decode(buf, base + err, aclcnt, acl); + return (err > 0) ? 0 : err; +} + +/* + * Decode setacl reply. + */ +static int +nfs3_xdr_setaclres(struct rpc_rqst *req, u32 *p, struct nfs_fattr *fattr) +{ + int status = ntohl(*p++); + + if (status) + return -nfs_stat_to_errno(status); + xdr_decode_post_op_attr(p, fattr); + return 0; +} +#endif /* CONFIG_NFS_V3_ACL */ + #ifndef MAX # define MAX(a, b) (((a) > (b))? (a) : (b)) #endif @@ -1021,3 +1143,28 @@ struct rpc_version nfs_version3 = { .procs = nfs3_procedures }; +#ifdef CONFIG_NFS_V3_ACL +static struct rpc_procinfo nfs3_acl_procedures[] = { + [ACLPROC3_GETACL] = { + .p_proc = ACLPROC3_GETACL, + .p_encode = (kxdrproc_t) nfs3_xdr_getaclargs, + .p_decode = (kxdrproc_t) nfs3_xdr_getaclres, + .p_bufsiz = MAX(ACL3_getaclargs_sz, ACL3_getaclres_sz) << 2, + .p_timer = 1, + }, + [ACLPROC3_SETACL] = { + .p_proc = ACLPROC3_SETACL, + .p_encode = (kxdrproc_t) nfs3_xdr_setaclargs, + .p_decode = (kxdrproc_t) nfs3_xdr_setaclres, + .p_bufsiz = MAX(ACL3_setaclargs_sz, ACL3_setaclres_sz) << 2, + .p_timer = 0, + }, +}; + +struct rpc_version nfsacl_version3 = { + .number = 3, + .nrprocs = sizeof(nfs3_acl_procedures)/ + sizeof(nfs3_acl_procedures[0]), + .procs = nfs3_acl_procedures, +}; +#endif /* CONFIG_NFS_V3_ACL */ diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c index fd5bc596fe8a..1b272a135a31 100644 --- a/fs/nfs/nfsroot.c +++ b/fs/nfs/nfsroot.c @@ -124,6 +124,7 @@ enum { Opt_soft, Opt_hard, Opt_intr, Opt_nointr, Opt_posix, Opt_noposix, Opt_cto, Opt_nocto, Opt_ac, Opt_noac, Opt_lock, Opt_nolock, Opt_v2, Opt_v3, Opt_udp, Opt_tcp, + Opt_acl, Opt_noacl, /* Error token */ Opt_err }; @@ -158,6 +159,8 @@ static match_table_t __initdata tokens = { {Opt_udp, "udp"}, {Opt_tcp, "proto=tcp"}, {Opt_tcp, "tcp"}, + {Opt_acl, "acl"}, + {Opt_noacl, "noacl"}, {Opt_err, NULL} }; @@ -266,6 +269,12 @@ static int __init root_nfs_parse(char *name, char *buf) case Opt_tcp: nfs_data.flags |= NFS_MOUNT_TCP; break; + case Opt_acl: + nfs_data.flags &= ~NFS_MOUNT_NOACL; + break; + case Opt_noacl: + nfs_data.flags |= NFS_MOUNT_NOACL; + break; default : return 0; } diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index d2b5d7e0e85a..3a5e442ac776 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -301,6 +301,9 @@ extern u32 root_nfs_parse_addr(char *name); /*__init*/ * linux/fs/nfs/file.c */ extern struct inode_operations nfs_file_inode_operations; +#ifdef CONFIG_NFS_V3 +extern struct inode_operations nfs3_file_inode_operations; +#endif /* CONFIG_NFS_V3 */ extern struct file_operations nfs_file_operations; extern struct address_space_operations nfs_file_aops; @@ -315,6 +318,22 @@ static inline struct rpc_cred *nfs_file_cred(struct file *file) return NULL; } +/* + * linux/fs/nfs/xattr.c + */ +#ifdef CONFIG_NFS_V3_ACL +extern ssize_t nfs3_listxattr(struct dentry *, char *, size_t); +extern ssize_t nfs3_getxattr(struct dentry *, const char *, void *, size_t); +extern int nfs3_setxattr(struct dentry *, const char *, + const void *, size_t, int); +extern int nfs3_removexattr (struct dentry *, const char *name); +#else +# define nfs3_listxattr NULL +# define nfs3_getxattr NULL +# define nfs3_setxattr NULL +# define nfs3_removexattr NULL +#endif + /* * linux/fs/nfs/direct.c */ @@ -329,6 +348,9 @@ extern ssize_t nfs_file_direct_write(struct kiocb *iocb, const char __user *buf, * linux/fs/nfs/dir.c */ extern struct inode_operations nfs_dir_inode_operations; +#ifdef CONFIG_NFS_V3 +extern struct inode_operations nfs3_dir_inode_operations; +#endif /* CONFIG_NFS_V3 */ extern struct file_operations nfs_dir_operations; extern struct dentry_operations nfs_dentry_operations; @@ -449,6 +471,15 @@ static inline void nfs_readdata_free(struct nfs_read_data *p) extern void nfs_readdata_release(struct rpc_task *task); +/* + * linux/fs/nfs3proc.c + */ +#ifdef CONFIG_NFS_V3_ACL +extern struct posix_acl *nfs3_proc_getacl(struct inode *inode, int type); +extern int nfs3_proc_setacl(struct inode *inode, int type, + struct posix_acl *acl); +#endif /* CONFIG_NFS_V3_ACL */ + /* * linux/fs/mount_clnt.c * (Used only by nfsroot module) diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index fc51645d61ee..3d3a305488cf 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -10,6 +10,7 @@ struct nfs_server { struct rpc_clnt * client; /* RPC client handle */ struct rpc_clnt * client_sys; /* 2nd handle for FSINFO */ + struct rpc_clnt * client_acl; /* ACL RPC client handle */ struct nfs_rpc_ops * rpc_ops; /* NFS protocol vector */ struct backing_dev_info backing_dev_info; int flags; /* various flags */ diff --git a/include/linux/nfs_mount.h b/include/linux/nfs_mount.h index 0071428231f9..659c75438454 100644 --- a/include/linux/nfs_mount.h +++ b/include/linux/nfs_mount.h @@ -58,6 +58,7 @@ struct nfs_mount_data { #define NFS_MOUNT_KERBEROS 0x0100 /* 3 */ #define NFS_MOUNT_NONLM 0x0200 /* 3 */ #define NFS_MOUNT_BROKEN_SUID 0x0400 /* 4 */ +#define NFS_MOUNT_NOACL 0x0800 /* 4 */ #define NFS_MOUNT_STRICTLOCK 0x1000 /* reserved for NFSv4 */ #define NFS_MOUNT_SECFLAVOUR 0x2000 /* 5 */ #define NFS_MOUNT_FLAGMASK 0xFFFF diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 46b206b460c0..a2bf6914ff1b 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -2,6 +2,7 @@ #define _LINUX_NFS_XDR_H #include +#include struct nfs4_fsid { __u64 major; @@ -368,6 +369,20 @@ struct nfs_readdirargs { struct page ** pages; }; +struct nfs3_getaclargs { + struct nfs_fh * fh; + int mask; + struct page ** pages; +}; + +struct nfs3_setaclargs { + struct inode * inode; + int mask; + struct posix_acl * acl_access; + struct posix_acl * acl_default; + struct page ** pages; +}; + struct nfs_diropok { struct nfs_fh * fh; struct nfs_fattr * fattr; @@ -491,6 +506,15 @@ struct nfs3_readdirres { int plus; }; +struct nfs3_getaclres { + struct nfs_fattr * fattr; + int mask; + unsigned int acl_access_count; + unsigned int acl_default_count; + struct posix_acl * acl_access; + struct posix_acl * acl_default; +}; + #ifdef CONFIG_NFS_V4 typedef u64 clientid4; @@ -748,4 +772,7 @@ extern struct rpc_version nfs_version2; extern struct rpc_version nfs_version3; extern struct rpc_version nfs_version4; +extern struct rpc_version nfsacl_version3; +extern struct rpc_program nfsacl_program; + #endif -- cgit v1.2.3-59-g8ed1b From 055ffbea0596942579b0dae71d5dab78de8135f6 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 22 Jun 2005 17:16:27 +0000 Subject: [PATCH] NFS: Fix handling of the umask when an NFSv3 default acl is present. NFSv3 has no concept of a umask on the server side: The client applies the umask locally, and sends the effective permissions to the server. This behavior is wrong when files are created in a directory that has a default ACL. In this case, the umask is supposed to be ignored, and only the default ACL determines the file's effective permissions. Usually its the server's task to conditionally apply the umask. But since the server knows nothing about the umask, we have to do it on the client side. This patch tries to fetch the parent directory's default ACL before creating a new file, computes the appropriate create mode to send to the server, and finally sets the new file's access and default acl appropriately. Many thanks to Buck Huppmann for sending the initial version of this patch, as well as for arguing why we need this change. Signed-off-by: Andreas Gruenbacher Acked-by: Olaf Kirch Signed-off-by: Andrew Morton Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 5 +++++ fs/nfs/nfs3acl.c | 29 +++++++++++++++++++++++++++++ fs/nfs/nfs3proc.c | 36 ++++++++++++++++++++++++++++++------ include/linux/nfs_fs.h | 9 +++++++++ 4 files changed, 73 insertions(+), 6 deletions(-) diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 440b9cbb6f81..50a03f1504a1 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -490,6 +490,11 @@ nfs_fill_super(struct super_block *sb, struct nfs_mount_data *data, int silent) #else server->flags &= ~NFS_MOUNT_NOACL; #endif /* CONFIG_NFS_V3_ACL */ + /* + * The VFS shouldn't apply the umask to mode bits. We will + * do so ourselves when necessary. + */ + sb->s_flags |= MS_POSIXACL; if (server->namelen == 0 || server->namelen > NFS3_MAXNAMLEN) server->namelen = NFS3_MAXNAMLEN; sb->s_time_gran = 1; diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c index 393ba79fc14f..89b6468700e7 100644 --- a/fs/nfs/nfs3acl.c +++ b/fs/nfs/nfs3acl.c @@ -301,3 +301,32 @@ int nfs3_proc_setacl(struct inode *inode, int type, struct posix_acl *acl) fail: return PTR_ERR(alloc); } + +int nfs3_proc_set_default_acl(struct inode *dir, struct inode *inode, + mode_t mode) +{ + struct posix_acl *dfacl, *acl; + int error = 0; + + dfacl = nfs3_proc_getacl(dir, ACL_TYPE_DEFAULT); + if (IS_ERR(dfacl)) { + error = PTR_ERR(dfacl); + return (error == -EOPNOTSUPP) ? 0 : error; + } + if (!dfacl) + return 0; + acl = posix_acl_clone(dfacl, GFP_KERNEL); + error = -ENOMEM; + if (!acl) + goto out_release_dfacl; + error = posix_acl_create_masq(acl, &mode); + if (error < 0) + goto out_release_acl; + error = nfs3_proc_setacls(inode, acl, S_ISDIR(inode->i_mode) ? + dfacl : NULL); +out_release_acl: + posix_acl_release(acl); +out_release_dfacl: + posix_acl_release(dfacl); + return error; +} diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index d03bac0cc42f..a9ddc196224d 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -314,7 +314,8 @@ nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, .fh = &fhandle, .fattr = &fattr }; - int status; + mode_t mode = sattr->ia_mode; + int status; dprintk("NFS call create %s\n", dentry->d_name.name); arg.createmode = NFS3_CREATE_UNCHECKED; @@ -324,6 +325,8 @@ nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, arg.verifier[1] = current->pid; } + sattr->ia_mode &= ~current->fs->umask; + again: dir_attr.valid = 0; fattr.valid = 0; @@ -370,6 +373,9 @@ again: nfs_refresh_inode(dentry->d_inode, &fattr); dprintk("NFS reply setattr (post-create): %d\n", status); } + if (status != 0) + goto out; + status = nfs3_proc_set_default_acl(dir, dentry->d_inode, mode); out: dprintk("NFS reply create: %d\n", status); return status; @@ -539,15 +545,24 @@ nfs3_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr) .fh = &fhandle, .fattr = &fattr }; - int status; + int mode = sattr->ia_mode; + int status; dprintk("NFS call mkdir %s\n", dentry->d_name.name); dir_attr.valid = 0; fattr.valid = 0; + + sattr->ia_mode &= ~current->fs->umask; + status = rpc_call(NFS_CLIENT(dir), NFS3PROC_MKDIR, &arg, &res, 0); nfs_refresh_inode(dir, &dir_attr); - if (status == 0) - status = nfs_instantiate(dentry, &fhandle, &fattr); + if (status != 0) + goto out; + status = nfs_instantiate(dentry, &fhandle, &fattr); + if (status != 0) + goto out; + status = nfs3_proc_set_default_acl(dir, dentry->d_inode, mode); +out: dprintk("NFS reply mkdir: %d\n", status); return status; } @@ -642,6 +657,7 @@ nfs3_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr, .fh = &fh, .fattr = &fattr }; + mode_t mode = sattr->ia_mode; int status; switch (sattr->ia_mode & S_IFMT) { @@ -654,12 +670,20 @@ nfs3_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr, dprintk("NFS call mknod %s %u:%u\n", dentry->d_name.name, MAJOR(rdev), MINOR(rdev)); + + sattr->ia_mode &= ~current->fs->umask; + dir_attr.valid = 0; fattr.valid = 0; status = rpc_call(NFS_CLIENT(dir), NFS3PROC_MKNOD, &arg, &res, 0); nfs_refresh_inode(dir, &dir_attr); - if (status == 0) - status = nfs_instantiate(dentry, &fh, &fattr); + if (status != 0) + goto out; + status = nfs_instantiate(dentry, &fh, &fattr); + if (status != 0) + goto out; + status = nfs3_proc_set_default_acl(dir, dentry->d_inode, mode); +out: dprintk("NFS reply mknod: %d\n", status); return status; } diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 3a5e442ac776..7662c5131b47 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -478,6 +478,15 @@ extern void nfs_readdata_release(struct rpc_task *task); extern struct posix_acl *nfs3_proc_getacl(struct inode *inode, int type); extern int nfs3_proc_setacl(struct inode *inode, int type, struct posix_acl *acl); +extern int nfs3_proc_set_default_acl(struct inode *dir, struct inode *inode, + mode_t mode); +#else +static inline int nfs3_proc_set_default_acl(struct inode *dir, + struct inode *inode, + mode_t mode) +{ + return 0; +} #endif /* CONFIG_NFS_V3_ACL */ /* -- cgit v1.2.3-59-g8ed1b From 5c6a9f7d92291c832d47e792ed1fafa44acb066e Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 22 Jun 2005 17:16:27 +0000 Subject: [PATCH] NFS: Cache the NFSv3 acls. Attach acls to inodes in the icache to avoid unnecessary GETACL RPC round-trips. As long as the client doesn't retrieve any acls itself, only the default acls of exiting directories and the default and access acls of new directories will end up in the cache, which preserves some memory compared to always caching the access and default acl of all files. Signed-off-by: Andreas Gruenbacher Acked-by: Olaf Kirch Signed-off-by: Andrew Morton Signed-off-by: Trond Myklebust --- fs/nfs/nfs3acl.c | 100 +++++++++++++++++++++++++++++++++++++++++-------- fs/nfs/nfs3proc.c | 1 + include/linux/nfs_fs.h | 11 ++++++ 3 files changed, 97 insertions(+), 15 deletions(-) diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c index 89b6468700e7..451112ff9aa4 100644 --- a/fs/nfs/nfs3acl.c +++ b/fs/nfs/nfs3acl.c @@ -113,6 +113,69 @@ int nfs3_removexattr(struct dentry *dentry, const char *name) return nfs3_proc_setacl(inode, type, NULL); } +static void __nfs3_forget_cached_acls(struct nfs_inode *nfsi) +{ + if (nfsi->acl_access != ERR_PTR(-EAGAIN)) { + posix_acl_release(nfsi->acl_access); + nfsi->acl_access = ERR_PTR(-EAGAIN); + } + if (nfsi->acl_default != ERR_PTR(-EAGAIN)) { + posix_acl_release(nfsi->acl_default); + nfsi->acl_default = ERR_PTR(-EAGAIN); + } +} + +void nfs3_forget_cached_acls(struct inode *inode) +{ + dprintk("NFS: nfs3_forget_cached_acls(%s/%ld)\n", inode->i_sb->s_id, + inode->i_ino); + spin_lock(&inode->i_lock); + __nfs3_forget_cached_acls(NFS_I(inode)); + spin_unlock(&inode->i_lock); +} + +static struct posix_acl *nfs3_get_cached_acl(struct inode *inode, int type) +{ + struct nfs_inode *nfsi = NFS_I(inode); + struct posix_acl *acl = ERR_PTR(-EAGAIN); + + spin_lock(&inode->i_lock); + switch(type) { + case ACL_TYPE_ACCESS: + acl = nfsi->acl_access; + break; + + case ACL_TYPE_DEFAULT: + acl = nfsi->acl_default; + break; + + default: + return ERR_PTR(-EINVAL); + } + if (acl == ERR_PTR(-EAGAIN)) + acl = ERR_PTR(-EAGAIN); + else + acl = posix_acl_dup(acl); + spin_unlock(&inode->i_lock); + dprintk("NFS: nfs3_get_cached_acl(%s/%ld, %d) = %p\n", inode->i_sb->s_id, + inode->i_ino, type, acl); + return acl; +} + +static void nfs3_cache_acls(struct inode *inode, struct posix_acl *acl, + struct posix_acl *dfacl) +{ + struct nfs_inode *nfsi = NFS_I(inode); + + dprintk("nfs3_cache_acls(%s/%ld, %p, %p)\n", inode->i_sb->s_id, + inode->i_ino, acl, dfacl); + spin_lock(&inode->i_lock); + __nfs3_forget_cached_acls(NFS_I(inode)); + nfsi->acl_access = posix_acl_dup(acl); + nfsi->acl_default = posix_acl_dup(dfacl); + spin_unlock(&inode->i_lock); +} + struct posix_acl *nfs3_proc_getacl(struct inode *inode, int type) { struct nfs_server *server = NFS_SERVER(inode); @@ -126,26 +189,32 @@ struct posix_acl *nfs3_proc_getacl(struct inode *inode, int type) struct nfs3_getaclres res = { .fattr = &fattr, }; - struct posix_acl *acl = NULL; + struct posix_acl *acl; int status, count; if (!nfs_server_capable(inode, NFS_CAP_ACLS)) return ERR_PTR(-EOPNOTSUPP); - switch (type) { - case ACL_TYPE_ACCESS: - args.mask = NFS_ACLCNT|NFS_ACL; - break; - - case ACL_TYPE_DEFAULT: - if (!S_ISDIR(inode->i_mode)) - return NULL; - args.mask = NFS_DFACLCNT|NFS_DFACL; - break; - - default: - return ERR_PTR(-EINVAL); - } + status = nfs_revalidate_inode(server, inode); + if (status < 0) + return ERR_PTR(status); + acl = nfs3_get_cached_acl(inode, type); + if (acl != ERR_PTR(-EAGAIN)) + return acl; + acl = NULL; + + /* + * Only get the access acl when explicitly requested: We don't + * need it for access decisions, and only some applications use + * it. Applications which request the access acl first are not + * penalized from this optimization. + */ + if (type == ACL_TYPE_ACCESS) + args.mask |= NFS_ACLCNT|NFS_ACL; + if (S_ISDIR(inode->i_mode)) + args.mask |= NFS_DFACLCNT|NFS_DFACL; + if (args.mask == 0) + return NULL; dprintk("NFS call getacl\n"); status = rpc_call(server->client_acl, ACLPROC3_GETACL, @@ -180,6 +249,7 @@ struct posix_acl *nfs3_proc_getacl(struct inode *inode, int type) res.acl_access = NULL; } } + nfs3_cache_acls(inode, res.acl_access, res.acl_default); switch(type) { case ACL_TYPE_ACCESS: diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index a9ddc196224d..7851569b31c6 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -882,4 +882,5 @@ struct nfs_rpc_ops nfs_v3_clientops = { .file_open = nfs_open, .file_release = nfs_release, .lock = nfs3_proc_lock, + .clear_acl_cache = nfs3_forget_cached_acls, }; diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 7662c5131b47..4ceac9ddac93 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -91,6 +91,8 @@ struct nfs_open_context { */ struct nfs_delegation; +struct posix_acl; + /* * nfs fs inode data in memory */ @@ -144,6 +146,10 @@ struct nfs_inode { atomic_t data_updates; struct nfs_access_entry cache_access; +#ifdef CONFIG_NFS_V3_ACL + struct posix_acl *acl_access; + struct posix_acl *acl_default; +#endif /* * This is the cookie verifier used for NFSv3 readdir @@ -480,6 +486,7 @@ extern int nfs3_proc_setacl(struct inode *inode, int type, struct posix_acl *acl); extern int nfs3_proc_set_default_acl(struct inode *dir, struct inode *inode, mode_t mode); +extern void nfs3_forget_cached_acls(struct inode *inode); #else static inline int nfs3_proc_set_default_acl(struct inode *dir, struct inode *inode, @@ -487,6 +494,10 @@ static inline int nfs3_proc_set_default_acl(struct inode *dir, { return 0; } + +static inline void nfs3_forget_cached_acls(struct inode *inode) +{ +} #endif /* CONFIG_NFS_V3_ACL */ /* -- cgit v1.2.3-59-g8ed1b From 458818ed76d3f495f9f32373c936456c9427f759 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 22 Jun 2005 17:16:27 +0000 Subject: [PATCH] NFS: Fix up v3 ACL caching code Initialize the inode cache values correctly. Clean up __nfs3_forget_cached_acls() Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 4 ++++ fs/nfs/nfs3acl.c | 11 ++++++----- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 50a03f1504a1..8a8d57d9d660 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1961,6 +1961,10 @@ static struct inode *nfs_alloc_inode(struct super_block *sb) if (!nfsi) return NULL; nfsi->flags = 0; +#ifdef CONFIG_NFS_V3_ACL + nfsi->acl_access = ERR_PTR(-EAGAIN); + nfsi->acl_default = ERR_PTR(-EAGAIN); +#endif #ifdef CONFIG_NFS_V4 nfsi->nfs4_acl = NULL; #endif /* CONFIG_NFS_V4 */ diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c index 451112ff9aa4..ee3536fc84a3 100644 --- a/fs/nfs/nfs3acl.c +++ b/fs/nfs/nfs3acl.c @@ -115,11 +115,11 @@ int nfs3_removexattr(struct dentry *dentry, const char *name) static void __nfs3_forget_cached_acls(struct nfs_inode *nfsi) { - if (nfsi->acl_access != ERR_PTR(-EAGAIN)) { + if (!IS_ERR(nfsi->acl_access)) { posix_acl_release(nfsi->acl_access); nfsi->acl_access = ERR_PTR(-EAGAIN); } - if (nfsi->acl_default != ERR_PTR(-EAGAIN)) { + if (!IS_ERR(nfsi->acl_default)) { posix_acl_release(nfsi->acl_default); nfsi->acl_default = ERR_PTR(-EAGAIN); } @@ -137,7 +137,7 @@ void nfs3_forget_cached_acls(struct inode *inode) static struct posix_acl *nfs3_get_cached_acl(struct inode *inode, int type) { struct nfs_inode *nfsi = NFS_I(inode); - struct posix_acl *acl = ERR_PTR(-EAGAIN); + struct posix_acl *acl = ERR_PTR(-EINVAL); spin_lock(&inode->i_lock); switch(type) { @@ -150,12 +150,13 @@ static struct posix_acl *nfs3_get_cached_acl(struct inode *inode, int type) break; default: - return ERR_PTR(-EINVAL); + goto out; } - if (acl == ERR_PTR(-EAGAIN)) + if (IS_ERR(acl)) acl = ERR_PTR(-EAGAIN); else acl = posix_acl_dup(acl); +out: spin_unlock(&inode->i_lock); dprintk("NFS: nfs3_get_cached_acl(%s/%ld, %d) = %p\n", inode->i_sb->s_id, inode->i_ino, type, acl); -- cgit v1.2.3-59-g8ed1b From 213484254c65e3c39c59df454132748b1367f816 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 22 Jun 2005 17:16:28 +0000 Subject: [PATCH] fix nfsacl pointer arithmetic and pg_class initialization bugs * Pointer arithmetic bug: p is in word units. This fixes a memory corruption with big acls. * Initialize pg_class to prevent a NULL pointer access. Signed-off-by: Andreas Gruenbacher Signed-off-by: Trond Myklebust --- fs/nfs/nfs3xdr.c | 2 +- fs/nfsd/nfssvc.c | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index a4437fb177f0..db4a904810a4 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c @@ -677,7 +677,7 @@ nfs3_xdr_setaclargs(struct rpc_rqst *req, u32 *p, /* put as much of the acls into head as possible. */ len_in_head = min_t(unsigned int, buf->head->iov_len - base, len); len -= len_in_head; - req->rq_slen = xdr_adjust_iovec(req->rq_svec, p + len_in_head); + req->rq_slen = xdr_adjust_iovec(req->rq_svec, p + (len_in_head >> 2)); for (count = 0; (count << PAGE_SHIFT) < len; count++) { args->pages[count] = alloc_page(GFP_KERNEL); diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 79b25b19fec8..904df604e86b 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -376,6 +376,7 @@ static struct svc_program nfsd_acl_program = { .pg_nvers = NFSD_ACL_NRVERS, .pg_vers = nfsd_acl_version, .pg_name = "nfsd", + .pg_class = "nfsd", .pg_stats = &nfsd_acl_svcstats, }; -- cgit v1.2.3-59-g8ed1b From b7ef19560f496fd3942e41e728950e5b5c9a461b Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Wed, 22 Jun 2005 17:16:28 +0000 Subject: [PATCH] NFSv4: fs/nfs/nfs4proc.c: small simplification The Coverity checker noticed that such a simplification was possible. Signed-off-by: Adrian Bunk Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 128d01cfea19..7ff23707256a 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -123,7 +123,7 @@ static void nfs4_setup_readdir(u64 cookie, u32 *verifier, struct dentry *dentry, BUG_ON(readdir->count < 80); if (cookie > 2) { - readdir->cookie = (cookie > 2) ? cookie : 0; + readdir->cookie = cookie; memcpy(&readdir->verifier, verifier, sizeof(readdir->verifier)); return; } -- cgit v1.2.3-59-g8ed1b From 3e9d41543b16e6117267edc0ca058c40f888d81a Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 22 Jun 2005 17:16:28 +0000 Subject: [PATCH] NFSv4: empty array fix Older gcc's don't like this. fs/nfs/nfs4proc.c:2194: field `data' has incomplete type Signed-off-by: Andrew Morton Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 7ff23707256a..3f281a857e38 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2191,7 +2191,7 @@ static void buf_to_pages(const void *buf, size_t buflen, struct nfs4_cached_acl { int cached; size_t len; - char data[]; + char data[0]; }; static void nfs4_set_cached_acl(struct inode *inode, struct nfs4_cached_acl *acl) -- cgit v1.2.3-59-g8ed1b From c56c2750229154f6a1cfee533e0a911da3923b5a Mon Sep 17 00:00:00 2001 From: Reuben Farrelly Date: Wed, 22 Jun 2005 17:16:28 +0000 Subject: [PATCH] NFSv4: Fix build warning From: Reuben Farrelly With gcc-4.0: fs/nfs/nfs4proc.c:2976: error: static declaration of 'nfs4_file_inode_operations' follows non-static declaration fs/nfs/nfs4_fs.h:179: error: previous declaration of 'nfs4_file_inode_operations' was here Signed-off-by: Andrew Morton Signed-off-by: Trond Myklebust --- fs/nfs/nfs4_fs.h | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index d71f416bd9e5..7c6f1d668fbd 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -176,7 +176,6 @@ struct nfs4_state_recovery_ops { extern struct dentry_operations nfs4_dentry_operations; extern struct inode_operations nfs4_dir_inode_operations; -extern struct inode_operations nfs4_file_inode_operations; /* inode.c */ extern ssize_t nfs4_getxattr(struct dentry *, const char *, void *, size_t); -- cgit v1.2.3-59-g8ed1b From 455a396710b71a743b28da2ed2185e5a9b38e26f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 22 Jun 2005 17:16:28 +0000 Subject: [PATCH] NFSv4: Fix an Oops in the callback code. The changeset "trond.myklebust@fys.uio.no|ChangeSet|20050322152404|16979" (RPC: Ensure XDR iovec length is initialized correctly in call_header) causes the NFSv4 callback code to BUG() due to an incorrectly initialized scratch buffer. Signed-off-by: Trond Myklebust --- fs/nfs/callback_xdr.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index c99677ec58f8..7c33b9a81a94 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c @@ -411,7 +411,6 @@ static int nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *resp xdr_init_decode(&xdr_in, &rqstp->rq_arg, rqstp->rq_arg.head[0].iov_base); p = (uint32_t*)((char *)rqstp->rq_res.head[0].iov_base + rqstp->rq_res.head[0].iov_len); - rqstp->rq_res.head[0].iov_len = PAGE_SIZE; xdr_init_encode(&xdr_out, &rqstp->rq_res, p); decode_compound_hdr_arg(&xdr_in, &hdr_arg); -- cgit v1.2.3-59-g8ed1b From 14b218a8e4f110206c46e586a3da372f665631e7 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 22 Jun 2005 17:16:28 +0000 Subject: [PATCH] RPC: Ensure rpc calls respects the RPC_NOINTR flag For internal purposes, the rpc_clnt_sigmask() call is replaced by a call to rpc_task_sigmask(), which ensures that the current task sigmask respects both the client cl_intr flag and the per-task NOINTR flag. Problem noted by Jiaying Zhang. Signed-off-by: Trond Myklebust --- net/sunrpc/clnt.c | 71 +++++++++++++++++++++++++++++-------------------------- 1 file changed, 37 insertions(+), 34 deletions(-) diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index c979fcf88798..f17e6153b688 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -378,38 +378,41 @@ rpc_default_callback(struct rpc_task *task) } /* - * Export the signal mask handling for aysnchronous code that + * Export the signal mask handling for synchronous code that * sleeps on RPC calls */ +#define RPC_INTR_SIGNALS (sigmask(SIGINT) | sigmask(SIGQUIT) | sigmask(SIGKILL)) +static void rpc_save_sigmask(sigset_t *oldset, int intr) +{ + unsigned long sigallow = 0; + sigset_t sigmask; + + /* Block all signals except those listed in sigallow */ + if (intr) + sigallow |= RPC_INTR_SIGNALS; + siginitsetinv(&sigmask, sigallow); + sigprocmask(SIG_BLOCK, &sigmask, oldset); +} + +static inline void rpc_task_sigmask(struct rpc_task *task, sigset_t *oldset) +{ + rpc_save_sigmask(oldset, !RPC_TASK_UNINTERRUPTIBLE(task)); +} + +static inline void rpc_restore_sigmask(sigset_t *oldset) +{ + sigprocmask(SIG_SETMASK, oldset, NULL); +} + void rpc_clnt_sigmask(struct rpc_clnt *clnt, sigset_t *oldset) { - unsigned long sigallow = sigmask(SIGKILL); - unsigned long irqflags; - - /* Turn off various signals */ - if (clnt->cl_intr) { - struct k_sigaction *action = current->sighand->action; - if (action[SIGINT-1].sa.sa_handler == SIG_DFL) - sigallow |= sigmask(SIGINT); - if (action[SIGQUIT-1].sa.sa_handler == SIG_DFL) - sigallow |= sigmask(SIGQUIT); - } - spin_lock_irqsave(¤t->sighand->siglock, irqflags); - *oldset = current->blocked; - siginitsetinv(¤t->blocked, sigallow & ~oldset->sig[0]); - recalc_sigpending(); - spin_unlock_irqrestore(¤t->sighand->siglock, irqflags); + rpc_save_sigmask(oldset, clnt->cl_intr); } void rpc_clnt_sigunmask(struct rpc_clnt *clnt, sigset_t *oldset) { - unsigned long irqflags; - - spin_lock_irqsave(¤t->sighand->siglock, irqflags); - current->blocked = *oldset; - recalc_sigpending(); - spin_unlock_irqrestore(¤t->sighand->siglock, irqflags); + rpc_restore_sigmask(oldset); } /* @@ -427,26 +430,26 @@ int rpc_call_sync(struct rpc_clnt *clnt, struct rpc_message *msg, int flags) BUG_ON(flags & RPC_TASK_ASYNC); - rpc_clnt_sigmask(clnt, &oldset); - status = -ENOMEM; task = rpc_new_task(clnt, NULL, flags); if (task == NULL) goto out; + /* Mask signals on RPC calls _and_ GSS_AUTH upcalls */ + rpc_task_sigmask(task, &oldset); + rpc_call_setup(task, msg, 0); /* Set up the call info struct and execute the task */ - if (task->tk_status == 0) + if (task->tk_status == 0) { status = rpc_execute(task); - else { + } else { status = task->tk_status; rpc_release_task(task); } + rpc_restore_sigmask(&oldset); out: - rpc_clnt_sigunmask(clnt, &oldset); - return status; } @@ -467,8 +470,6 @@ rpc_call_async(struct rpc_clnt *clnt, struct rpc_message *msg, int flags, flags |= RPC_TASK_ASYNC; - rpc_clnt_sigmask(clnt, &oldset); - /* Create/initialize a new RPC task */ if (!callback) callback = rpc_default_callback; @@ -477,6 +478,9 @@ rpc_call_async(struct rpc_clnt *clnt, struct rpc_message *msg, int flags, goto out; task->tk_calldata = data; + /* Mask signals on GSS_AUTH upcalls */ + rpc_task_sigmask(task, &oldset); + rpc_call_setup(task, msg, 0); /* Set up the call info struct and execute the task */ @@ -486,9 +490,8 @@ rpc_call_async(struct rpc_clnt *clnt, struct rpc_message *msg, int flags, else rpc_release_task(task); + rpc_restore_sigmask(&oldset); out: - rpc_clnt_sigunmask(clnt, &oldset); - return status; } @@ -666,7 +669,7 @@ call_allocate(struct rpc_task *task) return; printk(KERN_INFO "RPC: buffer allocation failed for task %p\n", task); - if (RPC_IS_ASYNC(task) || !(task->tk_client->cl_intr && signalled())) { + if (RPC_IS_ASYNC(task) || !signalled()) { xprt_release(task); task->tk_action = call_reserve; rpc_delay(task, HZ>>4); -- cgit v1.2.3-59-g8ed1b From 0f9dc2b16884bb5957d010ed8e9114e771a05916 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 22 Jun 2005 17:16:28 +0000 Subject: [PATCH] RPC: Clean up socket autodisconnect Cancel autodisconnect requests inside xprt_transmit() in order to avoid races. Use more efficient del_singleshot_timer_sync() Signed-off-by: Trond Myklebust --- net/sunrpc/xprt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index ef941e7de8bf..a74a1289113e 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -1240,6 +1240,8 @@ xprt_transmit(struct rpc_task *task) list_add_tail(&req->rq_list, &xprt->recv); spin_unlock_bh(&xprt->sock_lock); xprt_reset_majortimeo(req); + /* Turn off autodisconnect */ + del_singleshot_timer_sync(&xprt->timer); } } else if (!req->rq_bytes_sent) return; @@ -1370,8 +1372,6 @@ xprt_reserve(struct rpc_task *task) spin_lock(&xprt->xprt_lock); do_xprt_reserve(task); spin_unlock(&xprt->xprt_lock); - if (task->tk_rqstp) - del_timer_sync(&xprt->timer); } } -- cgit v1.2.3-59-g8ed1b From 20e5ac828dfd23b9080159c62a34f32d2dcd92fc Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 22 Jun 2005 17:16:28 +0000 Subject: [PATCH] RPC: TCP reconnects are too slow When the network layer reports a connection close, the RPC task waiting to reconnect should be notified so it can retry immediately instead of waiting for the normal connection establishment timeout. This reverts a change made in 2.6.6 as part of adding client support for RPC over TCP socket idle timeouts. Test-plan: Destructive testing with NFS over TCP mounts. Version: Fri, 29 Apr 2005 15:31:46 -0400 Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- net/sunrpc/xprt.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index a74a1289113e..2b8789cf8db1 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -1101,8 +1101,7 @@ tcp_state_change(struct sock *sk) case TCP_SYN_RECV: break; default: - if (xprt_test_and_clear_connected(xprt)) - rpc_wake_up_status(&xprt->pending, -ENOTCONN); + xprt_disconnect(xprt); break; } out: -- cgit v1.2.3-59-g8ed1b From ae3884621bf5b4caff7785b9a417f262202965b2 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 22 Jun 2005 17:16:28 +0000 Subject: [PATCH] RPC: kick off socket connect operations faster Make the socket transport kick the event queue to start socket connects immediately. This should improve responsiveness of applications that are sensitive to slow mount operations (like automounters). We are now also careful to cancel the connect worker before destroying the xprt. This eliminates a race where xprt_destroy can finish before the connect worker is even allowed to run. Test-plan: Destructive testing (unplugging the network temporarily). Connectathon with UDP and TCP. Hard-code impossibly small connect timeout. Version: Fri, 29 Apr 2005 15:32:01 -0400 Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- net/sunrpc/xprt.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 2b8789cf8db1..eca92405948f 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -569,8 +569,11 @@ void xprt_connect(struct rpc_task *task) if (xprt->sock != NULL) schedule_delayed_work(&xprt->sock_connect, RPC_REESTABLISH_TIMEOUT); - else + else { schedule_work(&xprt->sock_connect); + if (!RPC_IS_ASYNC(task)) + flush_scheduled_work(); + } } return; out_write: @@ -1685,6 +1688,10 @@ xprt_shutdown(struct rpc_xprt *xprt) rpc_wake_up(&xprt->backlog); wake_up(&xprt->cong_wait); del_timer_sync(&xprt->timer); + + /* synchronously wait for connect worker to finish */ + cancel_delayed_work(&xprt->sock_connect); + flush_scheduled_work(); } /* -- cgit v1.2.3-59-g8ed1b From 00a926422765064cb28e218d4837411c88bf6a3e Mon Sep 17 00:00:00 2001 From: Olivier Galibert Date: Wed, 22 Jun 2005 17:16:29 +0000 Subject: [PATCH] NFS: Hide NFS server-generated readdir cookies from userland NFSv3 currently returns the unsigned 64-bit cookie directly to userspace. The following patch causes the kernel to generate loff_t offsets for the benefit of userland. The current server-generated READDIR cookie is cached in the nfs_open_context instead of in filp->f_pos, so we still end up work correctly under directory insertions/deletion. Signed-off-by: Olivier Galibert Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 114 ++++++++++++++++++++++++++++++++++++++----------- fs/nfs/inode.c | 2 + include/linux/nfs_fs.h | 3 ++ 3 files changed, 95 insertions(+), 24 deletions(-) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 2c6a95945684..fceef29c65a3 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -141,7 +141,9 @@ typedef struct { struct page *page; unsigned long page_index; u32 *ptr; - u64 target; + u64 target_cookie; + int target_index; + int current_index; struct nfs_entry *entry; decode_dirent_t decode; int plus; @@ -225,14 +227,14 @@ void dir_page_release(nfs_readdir_descriptor_t *desc) /* * Given a pointer to a buffer that has already been filled by a call - * to readdir, find the next entry. + * to readdir, find the next entry with cookie 'desc->target_cookie'. * * If the end of the buffer has been reached, return -EAGAIN, if not, * return the offset within the buffer of the next entry to be * read. */ static inline -int find_dirent(nfs_readdir_descriptor_t *desc, struct page *page) +int find_dirent(nfs_readdir_descriptor_t *desc) { struct nfs_entry *entry = desc->entry; int loop_count = 0, @@ -240,7 +242,7 @@ int find_dirent(nfs_readdir_descriptor_t *desc, struct page *page) while((status = dir_decode(desc)) == 0) { dfprintk(VFS, "NFS: found cookie %Lu\n", (long long)entry->cookie); - if (entry->prev_cookie == desc->target) + if (entry->prev_cookie == desc->target_cookie) break; if (loop_count++ > 200) { loop_count = 0; @@ -252,8 +254,44 @@ int find_dirent(nfs_readdir_descriptor_t *desc, struct page *page) } /* - * Find the given page, and call find_dirent() in order to try to - * return the next entry. + * Given a pointer to a buffer that has already been filled by a call + * to readdir, find the entry at offset 'desc->target_index'. + * + * If the end of the buffer has been reached, return -EAGAIN, if not, + * return the offset within the buffer of the next entry to be + * read. + */ +static inline +int find_dirent_index(nfs_readdir_descriptor_t *desc) +{ + struct nfs_entry *entry = desc->entry; + int loop_count = 0, + status; + + for(;;) { + status = dir_decode(desc); + if (status) + break; + + dfprintk(VFS, "NFS: found cookie %Lu at index %d\n", (long long)entry->cookie, desc->current_index); + + if (desc->target_index == desc->current_index) { + desc->target_cookie = entry->cookie; + break; + } + desc->current_index++; + if (loop_count++ > 200) { + loop_count = 0; + schedule(); + } + } + dfprintk(VFS, "NFS: find_dirent_index() returns %d\n", status); + return status; +} + +/* + * Find the given page, and call find_dirent() or find_dirent_index in + * order to try to return the next entry. */ static inline int find_dirent_page(nfs_readdir_descriptor_t *desc) @@ -276,7 +314,10 @@ int find_dirent_page(nfs_readdir_descriptor_t *desc) /* NOTE: Someone else may have changed the READDIRPLUS flag */ desc->page = page; desc->ptr = kmap(page); /* matching kunmap in nfs_do_filldir */ - status = find_dirent(desc, page); + if (desc->target_cookie) + status = find_dirent(desc); + else + status = find_dirent_index(desc); if (status < 0) dir_page_release(desc); out: @@ -291,7 +332,8 @@ int find_dirent_page(nfs_readdir_descriptor_t *desc) * Recurse through the page cache pages, and return a * filled nfs_entry structure of the next directory entry if possible. * - * The target for the search is 'desc->target'. + * The target for the search is 'desc->target_cookie' if non-0, + * 'desc->target_index' otherwise */ static inline int readdir_search_pagecache(nfs_readdir_descriptor_t *desc) @@ -299,7 +341,19 @@ int readdir_search_pagecache(nfs_readdir_descriptor_t *desc) int loop_count = 0; int res; - dfprintk(VFS, "NFS: readdir_search_pagecache() searching for cookie %Lu\n", (long long)desc->target); + if (desc->target_cookie) + dfprintk(VFS, "NFS: readdir_search_pagecache() searching for cookie %Lu\n", (long long)desc->target_cookie); + else + dfprintk(VFS, "NFS: readdir_search_pagecache() searching for cookie number %d\n", desc->target_index); + + /* Always search-by-index from the beginning of the cache */ + if (!(desc->target_cookie)) { + desc->page_index = 0; + desc->entry->cookie = desc->entry->prev_cookie = 0; + desc->entry->eof = 0; + desc->current_index = 0; + } + for (;;) { res = find_dirent_page(desc); if (res != -EAGAIN) @@ -332,11 +386,12 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent, struct file *file = desc->file; struct nfs_entry *entry = desc->entry; struct dentry *dentry = NULL; + struct nfs_open_context *ctx = file->private_data; unsigned long fileid; int loop_count = 0, res; - dfprintk(VFS, "NFS: nfs_do_filldir() filling starting @ cookie %Lu\n", (long long)desc->target); + dfprintk(VFS, "NFS: nfs_do_filldir() filling starting @ cookie %Lu\n", (long long)entry->cookie); for(;;) { unsigned d_type = DT_UNKNOWN; @@ -356,10 +411,11 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent, } res = filldir(dirent, entry->name, entry->len, - entry->prev_cookie, fileid, d_type); + file->f_pos, fileid, d_type); if (res < 0) break; - file->f_pos = desc->target = entry->cookie; + file->f_pos++; + desc->target_cookie = entry->cookie; if (dir_decode(desc) != 0) { desc->page_index ++; break; @@ -369,10 +425,12 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent, schedule(); } } + ctx->dir_pos = file->f_pos; + ctx->dir_cookie = desc->target_cookie; dir_page_release(desc); if (dentry != NULL) dput(dentry); - dfprintk(VFS, "NFS: nfs_do_filldir() filling ended @ cookie %Lu; returning = %d\n", (long long)desc->target, res); + dfprintk(VFS, "NFS: nfs_do_filldir() filling ended @ cookie %Lu; returning = %d\n", (long long)desc->target_cookie, res); return res; } @@ -398,14 +456,14 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent, struct page *page = NULL; int status; - dfprintk(VFS, "NFS: uncached_readdir() searching for cookie %Lu\n", (long long)desc->target); + dfprintk(VFS, "NFS: uncached_readdir() searching for cookie %Lu\n", (long long)desc->target_cookie); page = alloc_page(GFP_HIGHUSER); if (!page) { status = -ENOMEM; goto out; } - desc->error = NFS_PROTO(inode)->readdir(file->f_dentry, cred, desc->target, + desc->error = NFS_PROTO(inode)->readdir(file->f_dentry, cred, desc->target_cookie, page, NFS_SERVER(inode)->dtsize, desc->plus); @@ -414,7 +472,7 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent, desc->ptr = kmap(page); /* matching kunmap in nfs_do_filldir */ if (desc->error >= 0) { if ((status = dir_decode(desc)) == 0) - desc->entry->prev_cookie = desc->target; + desc->entry->prev_cookie = desc->target_cookie; } else status = -EIO; if (status < 0) @@ -435,13 +493,15 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent, goto out; } -/* The file offset position is now represented as a true offset into the - * page cache as is the case in most of the other filesystems. +/* The file offset position represents the dirent entry number. A + last cookie cache takes care of the common case of reading the + whole directory. */ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir) { struct dentry *dentry = filp->f_dentry; struct inode *inode = dentry->d_inode; + struct nfs_open_context *ctx = filp->private_data; nfs_readdir_descriptor_t my_desc, *desc = &my_desc; struct nfs_entry my_entry; @@ -458,17 +518,22 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir) } /* - * filp->f_pos points to the file offset in the page cache. - * but if the cache has meanwhile been zapped, we need to - * read from the last dirent to revalidate f_pos - * itself. + * filp->f_pos points to the dirent entry number. + * ctx->dir_pos has the number of the cached cookie. We have + * to either find the entry with the appropriate number or + * revalidate the cookie. */ memset(desc, 0, sizeof(*desc)); desc->file = filp; - desc->target = filp->f_pos; desc->decode = NFS_PROTO(inode)->decode_dirent; desc->plus = NFS_USE_READDIRPLUS(inode); + desc->target_index = filp->f_pos; + + if (filp->f_pos == ctx->dir_pos) + desc->target_cookie = ctx->dir_cookie; + else + desc->target_cookie = 0; my_entry.cookie = my_entry.prev_cookie = 0; my_entry.eof = 0; @@ -478,9 +543,10 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir) while(!desc->entry->eof) { res = readdir_search_pagecache(desc); + if (res == -EBADCOOKIE) { /* This means either end of directory */ - if (desc->entry->cookie != desc->target) { + if (desc->target_cookie && desc->entry->cookie != desc->target_cookie) { /* Or that the server has 'lost' a cookie */ res = uncached_readdir(desc, dirent, filldir); if (res >= 0) diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 8a8d57d9d660..9fa02e7984ac 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -891,6 +891,8 @@ struct nfs_open_context *alloc_nfs_open_context(struct dentry *dentry, struct rp ctx->state = NULL; ctx->lockowner = current->files; ctx->error = 0; + ctx->dir_pos = 0; + ctx->dir_cookie = 0; } return ctx; } diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 4ceac9ddac93..f810195ef7ad 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -84,6 +84,9 @@ struct nfs_open_context { int error; struct list_head list; + + int dir_pos; /* Directory cookie cache */ + __u64 dir_cookie; }; /* -- cgit v1.2.3-59-g8ed1b From f0dd2136da6d2070e12bfa6d199b136318e666c7 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 22 Jun 2005 17:16:29 +0000 Subject: [PATCH] NFS: Clean up readdir changes. Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 85 ++++++++++++++++++++++++++++---------------------- fs/nfs/inode.c | 1 - include/linux/nfs_fs.h | 1 - 3 files changed, 48 insertions(+), 39 deletions(-) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index fceef29c65a3..b38a57e78a63 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -51,8 +51,10 @@ static int nfs_mknod(struct inode *, struct dentry *, int, dev_t); static int nfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *); static int nfs_fsync_dir(struct file *, struct dentry *, int); +static loff_t nfs_llseek_dir(struct file *, loff_t, int); struct file_operations nfs_dir_operations = { + .llseek = nfs_llseek_dir, .read = generic_read_dir, .readdir = nfs_readdir, .open = nfs_opendir, @@ -141,9 +143,8 @@ typedef struct { struct page *page; unsigned long page_index; u32 *ptr; - u64 target_cookie; - int target_index; - int current_index; + u64 *dir_cookie; + loff_t current_index; struct nfs_entry *entry; decode_dirent_t decode; int plus; @@ -227,7 +228,7 @@ void dir_page_release(nfs_readdir_descriptor_t *desc) /* * Given a pointer to a buffer that has already been filled by a call - * to readdir, find the next entry with cookie 'desc->target_cookie'. + * to readdir, find the next entry with cookie '*desc->dir_cookie'. * * If the end of the buffer has been reached, return -EAGAIN, if not, * return the offset within the buffer of the next entry to be @@ -241,8 +242,8 @@ int find_dirent(nfs_readdir_descriptor_t *desc) status; while((status = dir_decode(desc)) == 0) { - dfprintk(VFS, "NFS: found cookie %Lu\n", (long long)entry->cookie); - if (entry->prev_cookie == desc->target_cookie) + dfprintk(VFS, "NFS: found cookie %Lu\n", (unsigned long long)entry->cookie); + if (entry->prev_cookie == *desc->dir_cookie) break; if (loop_count++ > 200) { loop_count = 0; @@ -255,7 +256,7 @@ int find_dirent(nfs_readdir_descriptor_t *desc) /* * Given a pointer to a buffer that has already been filled by a call - * to readdir, find the entry at offset 'desc->target_index'. + * to readdir, find the entry at offset 'desc->file->f_pos'. * * If the end of the buffer has been reached, return -EAGAIN, if not, * return the offset within the buffer of the next entry to be @@ -273,10 +274,10 @@ int find_dirent_index(nfs_readdir_descriptor_t *desc) if (status) break; - dfprintk(VFS, "NFS: found cookie %Lu at index %d\n", (long long)entry->cookie, desc->current_index); + dfprintk(VFS, "NFS: found cookie %Lu at index %Ld\n", (unsigned long long)entry->cookie, desc->current_index); - if (desc->target_index == desc->current_index) { - desc->target_cookie = entry->cookie; + if (desc->file->f_pos == desc->current_index) { + *desc->dir_cookie = entry->cookie; break; } desc->current_index++; @@ -314,7 +315,7 @@ int find_dirent_page(nfs_readdir_descriptor_t *desc) /* NOTE: Someone else may have changed the READDIRPLUS flag */ desc->page = page; desc->ptr = kmap(page); /* matching kunmap in nfs_do_filldir */ - if (desc->target_cookie) + if (*desc->dir_cookie != 0) status = find_dirent(desc); else status = find_dirent_index(desc); @@ -332,8 +333,8 @@ int find_dirent_page(nfs_readdir_descriptor_t *desc) * Recurse through the page cache pages, and return a * filled nfs_entry structure of the next directory entry if possible. * - * The target for the search is 'desc->target_cookie' if non-0, - * 'desc->target_index' otherwise + * The target for the search is '*desc->dir_cookie' if non-0, + * 'desc->file->f_pos' otherwise */ static inline int readdir_search_pagecache(nfs_readdir_descriptor_t *desc) @@ -341,18 +342,15 @@ int readdir_search_pagecache(nfs_readdir_descriptor_t *desc) int loop_count = 0; int res; - if (desc->target_cookie) - dfprintk(VFS, "NFS: readdir_search_pagecache() searching for cookie %Lu\n", (long long)desc->target_cookie); - else - dfprintk(VFS, "NFS: readdir_search_pagecache() searching for cookie number %d\n", desc->target_index); - /* Always search-by-index from the beginning of the cache */ - if (!(desc->target_cookie)) { + if (*desc->dir_cookie == 0) { + dfprintk(VFS, "NFS: readdir_search_pagecache() searching for offset %Ld\n", (long long)desc->file->f_pos); desc->page_index = 0; desc->entry->cookie = desc->entry->prev_cookie = 0; desc->entry->eof = 0; desc->current_index = 0; - } + } else + dfprintk(VFS, "NFS: readdir_search_pagecache() searching for cookie %Lu\n", (unsigned long long)*desc->dir_cookie); for (;;) { res = find_dirent_page(desc); @@ -386,7 +384,6 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent, struct file *file = desc->file; struct nfs_entry *entry = desc->entry; struct dentry *dentry = NULL; - struct nfs_open_context *ctx = file->private_data; unsigned long fileid; int loop_count = 0, res; @@ -415,7 +412,7 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent, if (res < 0) break; file->f_pos++; - desc->target_cookie = entry->cookie; + *desc->dir_cookie = entry->cookie; if (dir_decode(desc) != 0) { desc->page_index ++; break; @@ -425,12 +422,10 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent, schedule(); } } - ctx->dir_pos = file->f_pos; - ctx->dir_cookie = desc->target_cookie; dir_page_release(desc); if (dentry != NULL) dput(dentry); - dfprintk(VFS, "NFS: nfs_do_filldir() filling ended @ cookie %Lu; returning = %d\n", (long long)desc->target_cookie, res); + dfprintk(VFS, "NFS: nfs_do_filldir() filling ended @ cookie %Lu; returning = %d\n", (unsigned long long)*desc->dir_cookie, res); return res; } @@ -456,14 +451,14 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent, struct page *page = NULL; int status; - dfprintk(VFS, "NFS: uncached_readdir() searching for cookie %Lu\n", (long long)desc->target_cookie); + dfprintk(VFS, "NFS: uncached_readdir() searching for cookie %Lu\n", (unsigned long long)*desc->dir_cookie); page = alloc_page(GFP_HIGHUSER); if (!page) { status = -ENOMEM; goto out; } - desc->error = NFS_PROTO(inode)->readdir(file->f_dentry, cred, desc->target_cookie, + desc->error = NFS_PROTO(inode)->readdir(file->f_dentry, cred, *desc->dir_cookie, page, NFS_SERVER(inode)->dtsize, desc->plus); @@ -472,7 +467,7 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent, desc->ptr = kmap(page); /* matching kunmap in nfs_do_filldir */ if (desc->error >= 0) { if ((status = dir_decode(desc)) == 0) - desc->entry->prev_cookie = desc->target_cookie; + desc->entry->prev_cookie = *desc->dir_cookie; } else status = -EIO; if (status < 0) @@ -501,7 +496,6 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir) { struct dentry *dentry = filp->f_dentry; struct inode *inode = dentry->d_inode; - struct nfs_open_context *ctx = filp->private_data; nfs_readdir_descriptor_t my_desc, *desc = &my_desc; struct nfs_entry my_entry; @@ -519,21 +513,16 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir) /* * filp->f_pos points to the dirent entry number. - * ctx->dir_pos has the number of the cached cookie. We have + * *desc->dir_cookie has the cookie for the next entry. We have * to either find the entry with the appropriate number or * revalidate the cookie. */ memset(desc, 0, sizeof(*desc)); desc->file = filp; + desc->dir_cookie = &((struct nfs_open_context *)filp->private_data)->dir_cookie; desc->decode = NFS_PROTO(inode)->decode_dirent; desc->plus = NFS_USE_READDIRPLUS(inode); - desc->target_index = filp->f_pos; - - if (filp->f_pos == ctx->dir_pos) - desc->target_cookie = ctx->dir_cookie; - else - desc->target_cookie = 0; my_entry.cookie = my_entry.prev_cookie = 0; my_entry.eof = 0; @@ -546,7 +535,7 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir) if (res == -EBADCOOKIE) { /* This means either end of directory */ - if (desc->target_cookie && desc->entry->cookie != desc->target_cookie) { + if (*desc->dir_cookie && desc->entry->cookie != *desc->dir_cookie) { /* Or that the server has 'lost' a cookie */ res = uncached_readdir(desc, dirent, filldir); if (res >= 0) @@ -579,6 +568,28 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir) return 0; } +loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int origin) +{ + down(&filp->f_dentry->d_inode->i_sem); + switch (origin) { + case 1: + offset += filp->f_pos; + case 0: + if (offset >= 0) + break; + default: + offset = -EINVAL; + goto out; + } + if (offset != filp->f_pos) { + filp->f_pos = offset; + ((struct nfs_open_context *)filp->private_data)->dir_cookie = 0; + } +out: + up(&filp->f_dentry->d_inode->i_sem); + return offset; +} + /* * All directory operations under NFS are synchronous, so fsync() * is a dummy operation. diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 9fa02e7984ac..6300e05e9463 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -891,7 +891,6 @@ struct nfs_open_context *alloc_nfs_open_context(struct dentry *dentry, struct rp ctx->state = NULL; ctx->lockowner = current->files; ctx->error = 0; - ctx->dir_pos = 0; ctx->dir_cookie = 0; } return ctx; diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index f810195ef7ad..c90313bfa435 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -85,7 +85,6 @@ struct nfs_open_context { struct list_head list; - int dir_pos; /* Directory cookie cache */ __u64 dir_cookie; }; -- cgit v1.2.3-59-g8ed1b From 202b50dc127cf4714ffdcc6a64f1648373f9414f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 22 Jun 2005 17:16:29 +0000 Subject: [PATCH] NFSv4: Ensure that propagate NFSv4 state errors to the reclaim code Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 51 ++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 40 insertions(+), 11 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 3f281a857e38..91e7fe867d58 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -269,14 +269,9 @@ static int nfs4_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *sta int err; do { err = _nfs4_open_reclaim(sp, state); - switch (err) { - case 0: - case -NFS4ERR_STALE_CLIENTID: - case -NFS4ERR_STALE_STATEID: - case -NFS4ERR_EXPIRED: - return err; - } - err = nfs4_handle_exception(server, err, &exception); + if (err != -NFS4ERR_DELAY) + break; + nfs4_handle_exception(server, err, &exception); } while (exception.retry); return err; } @@ -508,6 +503,20 @@ out_stale: goto out_nodeleg; } +static inline int nfs4_do_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *state, struct dentry *dentry) +{ + struct nfs_server *server = NFS_SERVER(dentry->d_inode); + struct nfs4_exception exception = { }; + int err; + + do { + err = _nfs4_open_expired(sp, state, dentry); + if (err == -NFS4ERR_DELAY) + nfs4_handle_exception(server, err, &exception); + } while (exception.retry); + return err; +} + static int nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *state) { struct nfs_inode *nfsi = NFS_I(state->inode); @@ -520,7 +529,7 @@ static int nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *sta continue; get_nfs_open_context(ctx); spin_unlock(&state->inode->i_lock); - status = _nfs4_open_expired(sp, state, ctx->dentry); + status = nfs4_do_open_expired(sp, state, ctx->dentry); put_nfs_open_context(ctx); return status; } @@ -2842,12 +2851,32 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *r static int nfs4_lock_reclaim(struct nfs4_state *state, struct file_lock *request) { - return _nfs4_do_setlk(state, F_SETLK, request, 1); + struct nfs_server *server = NFS_SERVER(state->inode); + struct nfs4_exception exception = { }; + int err; + + do { + err = _nfs4_do_setlk(state, F_SETLK, request, 1); + if (err != -NFS4ERR_DELAY) + break; + nfs4_handle_exception(server, err, &exception); + } while (exception.retry); + return err; } static int nfs4_lock_expired(struct nfs4_state *state, struct file_lock *request) { - return _nfs4_do_setlk(state, F_SETLK, request, 0); + struct nfs_server *server = NFS_SERVER(state->inode); + struct nfs4_exception exception = { }; + int err; + + do { + err = _nfs4_do_setlk(state, F_SETLK, request, 0); + if (err != -NFS4ERR_DELAY) + break; + nfs4_handle_exception(server, err, &exception); + } while (exception.retry); + return err; } static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock *request) -- cgit v1.2.3-59-g8ed1b From 08e9eac42edab63bce14b5c8419771f3c92aa3f4 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 22 Jun 2005 17:16:29 +0000 Subject: [PATCH] NFSv4: Fix up races in nfs4_proc_setattr() If we do not hold a valid stateid that is open for writes, there is little point in doing an extra open of the file, as the RFC does not appear to mandate this... Make setattr use the correct stateid if we're holding mandatory byte range locks. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 55 +++++++++++++++++++------------------------------------ 1 file changed, 19 insertions(+), 36 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 91e7fe867d58..af80b5981486 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -756,11 +756,10 @@ static int _nfs4_do_setattr(struct nfs_server *server, struct nfs_fattr *fattr, fattr->valid = 0; - if (state != NULL) + if (state != NULL) { msg.rpc_cred = state->owner->so_cred; - if (sattr->ia_valid & ATTR_SIZE) - nfs4_copy_stateid(&arg.stateid, state, NULL); - else + nfs4_copy_stateid(&arg.stateid, state, current->files); + } else memcpy(&arg.stateid, &zero_stateid, sizeof(arg.stateid)); return rpc_call_sync(server->client, &msg, 0); @@ -1124,47 +1123,31 @@ static int nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, struct iattr *sattr) { - struct inode * inode = dentry->d_inode; - int size_change = sattr->ia_valid & ATTR_SIZE; - struct nfs4_state *state = NULL; - int need_iput = 0; + struct rpc_cred *cred; + struct inode *inode = dentry->d_inode; + struct nfs4_state *state; int status; fattr->valid = 0; - if (size_change) { - struct rpc_cred *cred = rpcauth_lookupcred(NFS_SERVER(inode)->client->cl_auth, 0); - if (IS_ERR(cred)) - return PTR_ERR(cred); + cred = rpcauth_lookupcred(NFS_SERVER(inode)->client->cl_auth, 0); + if (IS_ERR(cred)) + return PTR_ERR(cred); + /* Search for an existing WRITE delegation first */ + state = nfs4_open_delegated(inode, FMODE_WRITE, cred); + if (!IS_ERR(state)) { + /* NB: nfs4_open_delegated() bumps the inode->i_count */ + iput(inode); + } else { + /* Search for an existing open(O_WRITE) stateid */ state = nfs4_find_state(inode, cred, FMODE_WRITE); - if (state == NULL) { - state = nfs4_open_delegated(dentry->d_inode, - FMODE_WRITE, cred); - if (IS_ERR(state)) - state = nfs4_do_open(dentry->d_parent->d_inode, - dentry, FMODE_WRITE, - NULL, cred); - need_iput = 1; - } - put_rpccred(cred); - if (IS_ERR(state)) - return PTR_ERR(state); - - if (state->inode != inode) { - printk(KERN_WARNING "nfs: raced in setattr (%p != %p), returning -EIO\n", inode, state->inode); - status = -EIO; - goto out; - } } + status = nfs4_do_setattr(NFS_SERVER(inode), fattr, NFS_FH(inode), sattr, state); -out: - if (state) { - inode = state->inode; + if (state != NULL) nfs4_close_state(state, FMODE_WRITE); - if (need_iput) - iput(inode); - } + put_rpccred(cred); return status; } -- cgit v1.2.3-59-g8ed1b From 951a143b3fcf15cfa9d38250b7462f821db241db Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 22 Jun 2005 17:16:30 +0000 Subject: [PATCH] NFS: Fix the file size revalidation Instead of looking at whether or not the file is open for writes before we accept to update the length using the server value, we should rather be looking at whether or not we are currently caching any writes. Failure to do so means in particular that we're not updating the file length correctly after obtaining a POSIX or BSD lock. Signed-off-by: Trond Myklebust --- fs/nfs/direct.c | 2 +- fs/nfs/inode.c | 69 +++++++++++++------------------------------------- fs/nfs/write.c | 4 +-- include/linux/nfs_fs.h | 1 - 4 files changed, 21 insertions(+), 55 deletions(-) diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 68df803f27ca..d6a30c844de3 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -517,7 +517,7 @@ retry: result = tot_bytes; out: - nfs_end_data_update_defer(inode); + nfs_end_data_update(inode); nfs_writedata_free(wdata); return result; diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 6300e05e9463..b2d16758ced8 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1147,27 +1147,6 @@ void nfs_end_data_update(struct inode *inode) atomic_dec(&nfsi->data_updates); } -/** - * nfs_end_data_update_defer - * @inode - pointer to inode - * Declare end of the operations that will update file data - * This will defer marking the inode as needing revalidation - * unless there are no other pending updates. - */ -void nfs_end_data_update_defer(struct inode *inode) -{ - struct nfs_inode *nfsi = NFS_I(inode); - - if (atomic_dec_and_test(&nfsi->data_updates)) { - /* Mark the attribute cache for revalidation */ - nfsi->flags |= NFS_INO_INVALID_ATTR; - /* Directories and symlinks: invalidate page cache too */ - if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) - nfsi->flags |= NFS_INO_INVALID_DATA; - nfsi->cache_change_attribute ++; - } -} - /** * nfs_refresh_inode - verify consistency of the inode attribute cache * @inode - pointer to inode @@ -1222,8 +1201,8 @@ int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr) if (!timespec_equal(&inode->i_mtime, &fattr->mtime) || cur_size != new_isize) nfsi->flags |= NFS_INO_INVALID_ATTR; - } else if (S_ISREG(inode->i_mode) && new_isize > cur_size) - nfsi->flags |= NFS_INO_INVALID_ATTR; + } else if (new_isize != cur_size && nfsi->npages == 0) + nfsi->flags |= NFS_INO_INVALID_ATTR; /* Have any file permissions changed? */ if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO) @@ -1257,10 +1236,8 @@ int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr) static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr, unsigned long verifier) { struct nfs_inode *nfsi = NFS_I(inode); - __u64 new_size; - loff_t new_isize; + loff_t cur_isize, new_isize; unsigned int invalid = 0; - loff_t cur_isize; int data_unstable; dfprintk(VFS, "NFS: %s(%s/%ld ct=%d info=0x%x)\n", @@ -1293,49 +1270,39 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr, unsign /* Are we racing with known updates of the metadata on the server? */ data_unstable = ! nfs_verify_change_attribute(inode, verifier); - /* Check if the file size agrees */ - new_size = fattr->size; + /* Check if our cached file size is stale */ new_isize = nfs_size_to_loff_t(fattr->size); cur_isize = i_size_read(inode); - if (cur_isize != new_size) { -#ifdef NFS_DEBUG_VERBOSE - printk(KERN_DEBUG "NFS: isize change on %s/%ld\n", inode->i_sb->s_id, inode->i_ino); -#endif - /* - * If we have pending writebacks, things can get - * messy. - */ - if (S_ISREG(inode->i_mode) && data_unstable) { - if (new_isize > cur_isize) { + if (new_isize != cur_isize) { + /* Do we perhaps have any outstanding writes? */ + if (nfsi->npages == 0) { + /* No, but did we race with nfs_end_data_update()? */ + if (verifier == nfsi->cache_change_attribute) { inode->i_size = new_isize; - invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; + invalid |= NFS_INO_INVALID_DATA; } - } else { + invalid |= NFS_INO_INVALID_ATTR; + } else if (new_isize > cur_isize) { inode->i_size = new_isize; invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; } + dprintk("NFS: isize change on server for file %s/%ld\n", + inode->i_sb->s_id, inode->i_ino); } - /* - * Note: we don't check inode->i_mtime since pipes etc. - * can change this value in VFS without requiring a - * cache revalidation. - */ + /* Check if the mtime agrees */ if (!timespec_equal(&inode->i_mtime, &fattr->mtime)) { memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime)); -#ifdef NFS_DEBUG_VERBOSE - printk(KERN_DEBUG "NFS: mtime change on %s/%ld\n", inode->i_sb->s_id, inode->i_ino); -#endif + dprintk("NFS: mtime change on server for file %s/%ld\n", + inode->i_sb->s_id, inode->i_ino); if (!data_unstable) invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; } if ((fattr->valid & NFS_ATTR_FATTR_V4) && nfsi->change_attr != fattr->change_attr) { -#ifdef NFS_DEBUG_VERBOSE - printk(KERN_DEBUG "NFS: change_attr change on %s/%ld\n", + dprintk("NFS: change_attr change on server for file %s/%ld\n", inode->i_sb->s_id, inode->i_ino); -#endif nfsi->change_attr = fattr->change_attr; if (!data_unstable) invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 6f7a4af3bc46..c574d551f029 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -220,7 +220,7 @@ static int nfs_writepage_sync(struct nfs_open_context *ctx, struct inode *inode, ClearPageError(page); io_error: - nfs_end_data_update_defer(inode); + nfs_end_data_update(inode); nfs_writedata_free(wdata); return written ? written : result; } @@ -401,7 +401,7 @@ static void nfs_inode_remove_request(struct nfs_page *req) nfsi->npages--; if (!nfsi->npages) { spin_unlock(&nfsi->req_lock); - nfs_end_data_update_defer(inode); + nfs_end_data_update(inode); iput(inode); } else spin_unlock(&nfsi->req_lock); diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index c90313bfa435..211266c56ce5 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -294,7 +294,6 @@ extern void nfs_begin_attr_update(struct inode *); extern void nfs_end_attr_update(struct inode *); extern void nfs_begin_data_update(struct inode *); extern void nfs_end_data_update(struct inode *); -extern void nfs_end_data_update_defer(struct inode *); extern struct nfs_open_context *alloc_nfs_open_context(struct dentry *dentry, struct rpc_cred *cred); extern struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx); extern void put_nfs_open_context(struct nfs_open_context *ctx); -- cgit v1.2.3-59-g8ed1b From 7d52e86274e09fce8ac8f963e3605a84d0a305a7 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 22 Jun 2005 17:16:30 +0000 Subject: [PATCH] NFS: Cleanup of caching code, and slight optimization of writes. Unless we're doing O_APPEND writes, we really don't care about revalidating the file length. Just make sure that we catch any page cache invalidations. Signed-off-by: Trond Myklebust --- fs/nfs/file.c | 12 +++++++++--- fs/nfs/inode.c | 44 +++++++++++++++++++++++++++++--------------- include/linux/nfs_fs.h | 1 + 3 files changed, 39 insertions(+), 18 deletions(-) diff --git a/fs/nfs/file.c b/fs/nfs/file.c index a606708264ed..40436857ed42 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -333,9 +333,15 @@ nfs_file_write(struct kiocb *iocb, const char __user *buf, size_t count, loff_t result = -EBUSY; if (IS_SWAPFILE(inode)) goto out_swapfile; - result = nfs_revalidate_inode(NFS_SERVER(inode), inode); - if (result) - goto out; + /* + * O_APPEND implies that we must revalidate the file length. + */ + if (iocb->ki_filp->f_flags & O_APPEND) { + result = nfs_revalidate_file_size(inode, iocb->ki_filp); + if (result) + goto out; + } else + nfs_revalidate_mapping(inode, iocb->ki_filp->f_mapping); result = count; if (!count) diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index b2d16758ced8..a3922f4cc0a8 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1062,21 +1062,7 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) if (verifier == nfsi->cache_change_attribute) nfsi->flags &= ~(NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ATIME); /* Do the page cache invalidation */ - if (flags & NFS_INO_INVALID_DATA) { - if (S_ISREG(inode->i_mode)) { - if (filemap_fdatawrite(inode->i_mapping) == 0) - filemap_fdatawait(inode->i_mapping); - nfs_wb_all(inode); - } - nfsi->flags &= ~NFS_INO_INVALID_DATA; - invalidate_inode_pages2(inode->i_mapping); - memset(NFS_COOKIEVERF(inode), 0, sizeof(NFS_COOKIEVERF(inode))); - dfprintk(PAGECACHE, "NFS: (%s/%Ld) data cache invalidated\n", - inode->i_sb->s_id, - (long long)NFS_FILEID(inode)); - /* This ensures we revalidate dentries */ - nfsi->cache_change_attribute++; - } + nfs_revalidate_mapping(inode, inode->i_mapping); if (flags & NFS_INO_INVALID_ACL) nfs_zap_acl_cache(inode); dfprintk(PAGECACHE, "NFS: (%s/%Ld) revalidation complete\n", @@ -1115,6 +1101,34 @@ int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) return __nfs_revalidate_inode(server, inode); } +/** + * nfs_revalidate_mapping - Revalidate the pagecache + * @inode - pointer to host inode + * @mapping - pointer to mapping + */ +void nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping) +{ + struct nfs_inode *nfsi = NFS_I(inode); + + if (nfsi->flags & NFS_INO_INVALID_DATA) { + if (S_ISREG(inode->i_mode)) { + if (filemap_fdatawrite(mapping) == 0) + filemap_fdatawait(mapping); + nfs_wb_all(inode); + } + invalidate_inode_pages2(mapping); + nfsi->flags &= ~NFS_INO_INVALID_DATA; + if (S_ISDIR(inode->i_mode)) { + memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf)); + /* This ensures we revalidate child dentries */ + nfsi->cache_change_attribute++; + } + dfprintk(PAGECACHE, "NFS: (%s/%Ld) data cache invalidated\n", + inode->i_sb->s_id, + (long long)NFS_FILEID(inode)); + } +} + /** * nfs_begin_data_update * @inode - pointer to inode diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 211266c56ce5..443103c13e53 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -289,6 +289,7 @@ extern int nfs_release(struct inode *, struct file *); extern int nfs_attribute_timeout(struct inode *inode); extern int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode); extern int __nfs_revalidate_inode(struct nfs_server *, struct inode *); +extern void nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping); extern int nfs_setattr(struct dentry *, struct iattr *); extern void nfs_begin_attr_update(struct inode *); extern void nfs_end_attr_update(struct inode *); -- cgit v1.2.3-59-g8ed1b From fe51beecc55d0b0dce289e4758e7c529a642f63e Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 22 Jun 2005 17:16:30 +0000 Subject: [PATCH] NFS: Ensure that fstat() always returns the correct mtime Even if the file is open for writes. Signed-off-by: Trond Myklebust --- fs/nfs/file.c | 28 ++++++++++++++++++++++------ fs/nfs/inode.c | 24 ++++++++++++++++-------- include/linux/nfs_fs.h | 1 + 3 files changed, 39 insertions(+), 14 deletions(-) diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 40436857ed42..5621ba9885f4 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -127,6 +127,21 @@ nfs_file_release(struct inode *inode, struct file *filp) return NFS_PROTO(inode)->file_release(inode, filp); } +/** + * nfs_revalidate_file - Revalidate the page cache & related metadata + * @inode - pointer to inode struct + * @file - pointer to file + */ +static int nfs_revalidate_file(struct inode *inode, struct file *filp) +{ + int retval = 0; + + if ((NFS_FLAGS(inode) & NFS_INO_REVAL_PAGECACHE) || nfs_attribute_timeout(inode)) + retval = __nfs_revalidate_inode(NFS_SERVER(inode), inode); + nfs_revalidate_mapping(inode, filp->f_mapping); + return 0; +} + /** * nfs_revalidate_size - Revalidate the file size * @inode - pointer to inode struct @@ -149,7 +164,8 @@ static int nfs_revalidate_file_size(struct inode *inode, struct file *filp) goto force_reval; if (nfsi->npages != 0) return 0; - return nfs_revalidate_inode(server, inode); + if (!(NFS_FLAGS(inode) & NFS_INO_REVAL_PAGECACHE) && !nfs_attribute_timeout(inode)) + return 0; force_reval: return __nfs_revalidate_inode(server, inode); } @@ -210,7 +226,7 @@ nfs_file_read(struct kiocb *iocb, char __user * buf, size_t count, loff_t pos) dentry->d_parent->d_name.name, dentry->d_name.name, (unsigned long) count, (unsigned long) pos); - result = nfs_revalidate_inode(NFS_SERVER(inode), inode); + result = nfs_revalidate_file(inode, iocb->ki_filp); if (!result) result = generic_file_aio_read(iocb, buf, count, pos); return result; @@ -228,7 +244,7 @@ nfs_file_sendfile(struct file *filp, loff_t *ppos, size_t count, dentry->d_parent->d_name.name, dentry->d_name.name, (unsigned long) count, (unsigned long long) *ppos); - res = nfs_revalidate_inode(NFS_SERVER(inode), inode); + res = nfs_revalidate_file(inode, filp); if (!res) res = generic_file_sendfile(filp, ppos, count, actor, target); return res; @@ -244,7 +260,7 @@ nfs_file_mmap(struct file * file, struct vm_area_struct * vma) dfprintk(VFS, "nfs: mmap(%s/%s)\n", dentry->d_parent->d_name.name, dentry->d_name.name); - status = nfs_revalidate_inode(NFS_SERVER(inode), inode); + status = nfs_revalidate_file(inode, file); if (!status) status = generic_file_mmap(file, vma); return status; @@ -340,8 +356,8 @@ nfs_file_write(struct kiocb *iocb, const char __user *buf, size_t count, loff_t result = nfs_revalidate_file_size(inode, iocb->ki_filp); if (result) goto out; - } else - nfs_revalidate_mapping(inode, iocb->ki_filp->f_mapping); + } + nfs_revalidate_mapping(inode, iocb->ki_filp->f_mapping); result = count; if (!count) diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index a3922f4cc0a8..4f545f382ba6 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -620,9 +620,9 @@ nfs_zap_caches(struct inode *inode) memset(NFS_COOKIEVERF(inode), 0, sizeof(NFS_COOKIEVERF(inode))); if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)) - nfsi->flags |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; + nfsi->flags |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL|NFS_INO_REVAL_PAGECACHE; else - nfsi->flags |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; + nfsi->flags |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL|NFS_INO_REVAL_PAGECACHE; } static void nfs_zap_acl_cache(struct inode *inode) @@ -1055,6 +1055,7 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) goto out; } flags = nfsi->flags; + nfsi->flags &= ~NFS_INO_REVAL_PAGECACHE; /* * We may need to keep the attributes marked as invalid if * we raced with nfs_end_attr_update(). @@ -1187,8 +1188,11 @@ int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr) if ((fattr->valid & NFS_ATTR_PRE_CHANGE) != 0 && nfsi->change_attr == fattr->pre_change_attr) nfsi->change_attr = fattr->change_attr; - if (!data_unstable && nfsi->change_attr != fattr->change_attr) + if (nfsi->change_attr != fattr->change_attr) { nfsi->flags |= NFS_INO_INVALID_ATTR; + if (!data_unstable) + nfsi->flags |= NFS_INO_REVAL_PAGECACHE; + } } if ((fattr->valid & NFS_ATTR_FATTR) == 0) @@ -1211,12 +1215,16 @@ int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr) } /* Verify a few of the more important attributes */ - if (!data_unstable) { - if (!timespec_equal(&inode->i_mtime, &fattr->mtime) - || cur_size != new_isize) - nfsi->flags |= NFS_INO_INVALID_ATTR; - } else if (new_isize != cur_size && nfsi->npages == 0) + if (!timespec_equal(&inode->i_mtime, &fattr->mtime)) { nfsi->flags |= NFS_INO_INVALID_ATTR; + if (!data_unstable) + nfsi->flags |= NFS_INO_REVAL_PAGECACHE; + } + if (cur_size != new_isize) { + nfsi->flags |= NFS_INO_INVALID_ATTR; + if (nfsi->npages == 0) + nfsi->flags |= NFS_INO_REVAL_PAGECACHE; + } /* Have any file permissions changed? */ if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO) diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 443103c13e53..2954e44ed498 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -198,6 +198,7 @@ struct nfs_inode { #define NFS_INO_INVALID_ATIME 0x0020 /* cached atime is invalid */ #define NFS_INO_INVALID_ACCESS 0x0040 /* cached access cred invalid */ #define NFS_INO_INVALID_ACL 0x0080 /* cached acls are invalid */ +#define NFS_INO_REVAL_PAGECACHE 0x1000 /* must revalidate pagecache */ static inline struct nfs_inode *NFS_I(struct inode *inode) { -- cgit v1.2.3-59-g8ed1b From ab0a3dbedc51037f3d2e22ef67717a987b3d15e2 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 22 Jun 2005 17:16:30 +0000 Subject: [PATCH] NFS: Write optimization for short files and small O_SYNC writes. Use stable writes if we can see that we are only going to put a single write on the wire. Signed-off-by: Trond Myklebust --- fs/nfs/write.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/fs/nfs/write.c b/fs/nfs/write.c index c574d551f029..79b621a545b2 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -750,7 +750,7 @@ int nfs_updatepage(struct file *file, struct page *page, * is entirely in cache, it may be more efficient to avoid * fragmenting write requests. */ - if (PageUptodate(page) && inode->i_flock == NULL) { + if (PageUptodate(page) && inode->i_flock == NULL && !(file->f_mode & O_SYNC)) { loff_t end_offs = i_size_read(inode) - 1; unsigned long end_index = end_offs >> PAGE_CACHE_SHIFT; @@ -1342,8 +1342,16 @@ static int nfs_flush_inode(struct inode *inode, unsigned long idx_start, spin_lock(&nfsi->req_lock); res = nfs_scan_dirty(inode, &head, idx_start, npages); spin_unlock(&nfsi->req_lock); - if (res) - error = nfs_flush_list(&head, NFS_SERVER(inode)->wpages, how); + if (res) { + struct nfs_server *server = NFS_SERVER(inode); + + /* For single writes, FLUSH_STABLE is more efficient */ + if (res == nfsi->npages && nfsi->npages <= server->wpages) { + if (res > 1 || nfs_list_entry(head.next)->wb_bytes <= server->wsize) + how |= FLUSH_STABLE; + } + error = nfs_flush_list(&head, server->wpages, how); + } if (error < 0) return error; return res; -- cgit v1.2.3-59-g8ed1b From c6a556b88adfacd2af90be84357c8165d716c27d Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 22 Jun 2005 17:16:30 +0000 Subject: [PATCH] NFS: Make searching and waiting on busy writeback requests more efficient. Basically copies the VFS's method for tracking writebacks and applies it to the struct nfs_page. Signed-off-by: Trond Myklebust --- fs/nfs/pagelist.c | 29 ++++++++++++++++++++++++++++- fs/nfs/read.c | 3 --- fs/nfs/write.c | 19 +++++++++---------- include/linux/nfs_page.h | 12 ++++++++---- 4 files changed, 45 insertions(+), 18 deletions(-) diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 80777f99a58a..356a33bb38a6 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -111,6 +111,33 @@ void nfs_unlock_request(struct nfs_page *req) nfs_release_request(req); } +/** + * nfs_set_page_writeback_locked - Lock a request for writeback + * @req: + */ +int nfs_set_page_writeback_locked(struct nfs_page *req) +{ + struct nfs_inode *nfsi = NFS_I(req->wb_context->dentry->d_inode); + + if (!nfs_lock_request(req)) + return 0; + radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_WRITEBACK); + return 1; +} + +/** + * nfs_clear_page_writeback - Unlock request and wake up sleepers + */ +void nfs_clear_page_writeback(struct nfs_page *req) +{ + struct nfs_inode *nfsi = NFS_I(req->wb_context->dentry->d_inode); + + spin_lock(&nfsi->req_lock); + radix_tree_tag_clear(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_WRITEBACK); + spin_unlock(&nfsi->req_lock); + nfs_unlock_request(req); +} + /** * nfs_clear_request - Free up all resources allocated to the request * @req: @@ -301,7 +328,7 @@ nfs_scan_list(struct list_head *head, struct list_head *dst, if (req->wb_index > idx_end) break; - if (!nfs_lock_request(req)) + if (!nfs_set_page_writeback_locked(req)) continue; nfs_list_remove_request(req); nfs_list_add_request(req, dst); diff --git a/fs/nfs/read.c b/fs/nfs/read.c index a0042fb58634..6f866b8aa2d5 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -173,7 +173,6 @@ static int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, if (len < PAGE_CACHE_SIZE) memclear_highpage_flush(page, len, PAGE_CACHE_SIZE - len); - nfs_lock_request(new); nfs_list_add_request(new, &one_request); nfs_pagein_one(&one_request, inode); return 0; @@ -185,7 +184,6 @@ static void nfs_readpage_release(struct nfs_page *req) nfs_clear_request(req); nfs_release_request(req); - nfs_unlock_request(req); dprintk("NFS: read done (%s/%Ld %d@%Ld)\n", req->wb_context->dentry->d_inode->i_sb->s_id, @@ -553,7 +551,6 @@ readpage_async_filler(void *data, struct page *page) } if (len < PAGE_CACHE_SIZE) memclear_highpage_flush(page, len, PAGE_CACHE_SIZE - len); - nfs_lock_request(new); nfs_list_add_request(new, desc->head); return 0; } diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 79b621a545b2..58a39b0486a7 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -503,13 +503,12 @@ nfs_wait_on_requests(struct inode *inode, unsigned long idx_start, unsigned int spin_lock(&nfsi->req_lock); next = idx_start; - while (radix_tree_gang_lookup(&nfsi->nfs_page_tree, (void **)&req, next, 1)) { + while (radix_tree_gang_lookup_tag(&nfsi->nfs_page_tree, (void **)&req, next, 1, NFS_PAGE_TAG_WRITEBACK)) { if (req->wb_index > idx_end) break; next = req->wb_index + 1; - if (!NFS_WBACK_BUSY(req)) - continue; + BUG_ON(!NFS_WBACK_BUSY(req)); atomic_inc(&req->wb_count); spin_unlock(&nfsi->req_lock); @@ -821,7 +820,7 @@ out: #else nfs_inode_remove_request(req); #endif - nfs_unlock_request(req); + nfs_clear_page_writeback(req); } static inline int flush_task_priority(int how) @@ -952,7 +951,7 @@ out_bad: nfs_writedata_free(data); } nfs_mark_request_dirty(req); - nfs_unlock_request(req); + nfs_clear_page_writeback(req); return -ENOMEM; } @@ -1002,7 +1001,7 @@ static int nfs_flush_one(struct list_head *head, struct inode *inode, int how) struct nfs_page *req = nfs_list_entry(head->next); nfs_list_remove_request(req); nfs_mark_request_dirty(req); - nfs_unlock_request(req); + nfs_clear_page_writeback(req); } return -ENOMEM; } @@ -1029,7 +1028,7 @@ nfs_flush_list(struct list_head *head, int wpages, int how) req = nfs_list_entry(head->next); nfs_list_remove_request(req); nfs_mark_request_dirty(req); - nfs_unlock_request(req); + nfs_clear_page_writeback(req); } return error; } @@ -1121,7 +1120,7 @@ static void nfs_writeback_done_full(struct nfs_write_data *data, int status) nfs_inode_remove_request(req); #endif next: - nfs_unlock_request(req); + nfs_clear_page_writeback(req); } } @@ -1278,7 +1277,7 @@ nfs_commit_list(struct list_head *head, int how) req = nfs_list_entry(head->next); nfs_list_remove_request(req); nfs_mark_request_commit(req); - nfs_unlock_request(req); + nfs_clear_page_writeback(req); } return -ENOMEM; } @@ -1324,7 +1323,7 @@ nfs_commit_done(struct rpc_task *task) dprintk(" mismatch\n"); nfs_mark_request_dirty(req); next: - nfs_unlock_request(req); + nfs_clear_page_writeback(req); res++; } sub_page_state(nr_unstable,res); diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index 39e4895bcdb4..db40e4590ba2 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -19,6 +19,11 @@ #include +/* + * Valid flags for the radix tree + */ +#define NFS_PAGE_TAG_WRITEBACK 1 + /* * Valid flags for a dirty buffer */ @@ -62,6 +67,9 @@ extern int nfs_coalesce_requests(struct list_head *, struct list_head *, unsigned int); extern int nfs_wait_on_request(struct nfs_page *); extern void nfs_unlock_request(struct nfs_page *req); +extern int nfs_set_page_writeback_locked(struct nfs_page *req); +extern void nfs_clear_page_writeback(struct nfs_page *req); + /* * Lock the page of an asynchronous request without incrementing the wb_count @@ -96,10 +104,6 @@ nfs_list_remove_request(struct nfs_page *req) { if (list_empty(&req->wb_list)) return; - if (!NFS_WBACK_BUSY(req)) { - printk(KERN_ERR "NFS: unlocked request attempted removed from list!\n"); - BUG(); - } list_del_init(&req->wb_list); req->wb_list_head = NULL; } -- cgit v1.2.3-59-g8ed1b From 3da28eb1c6545fe73263a24eba0996217490e1eb Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 22 Jun 2005 17:16:31 +0000 Subject: [PATCH] NFS: Replace nfs_page insertion sort with a radix sort Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 2 +- fs/nfs/pagelist.c | 86 +++++++++++++++++++++++++++++++----------------- fs/nfs/write.c | 71 ++++++++++++++++++--------------------- include/linux/nfs_fs.h | 4 +-- include/linux/nfs_page.h | 18 ++++++++-- 5 files changed, 107 insertions(+), 74 deletions(-) diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 4f545f382ba6..4845911f1c63 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -135,7 +135,7 @@ nfs_write_inode(struct inode *inode, int sync) int flags = sync ? FLUSH_WAIT : 0; int ret; - ret = nfs_commit_inode(inode, 0, 0, flags); + ret = nfs_commit_inode(inode, flags); if (ret < 0) return ret; return 0; diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 356a33bb38a6..d53857b148e2 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -177,36 +177,6 @@ nfs_release_request(struct nfs_page *req) nfs_page_free(req); } -/** - * nfs_list_add_request - Insert a request into a sorted list - * @req: request - * @head: head of list into which to insert the request. - * - * Note that the wb_list is sorted by page index in order to facilitate - * coalescing of requests. - * We use an insertion sort that is optimized for the case of appended - * writes. - */ -void -nfs_list_add_request(struct nfs_page *req, struct list_head *head) -{ - struct list_head *pos; - -#ifdef NFS_PARANOIA - if (!list_empty(&req->wb_list)) { - printk(KERN_ERR "NFS: Add to list failed!\n"); - BUG(); - } -#endif - list_for_each_prev(pos, head) { - struct nfs_page *p = nfs_list_entry(pos); - if (p->wb_index < req->wb_index) - break; - } - list_add(&req->wb_list, pos); - req->wb_list_head = head; -} - static int nfs_wait_bit_interruptible(void *word) { int ret = 0; @@ -291,6 +261,62 @@ nfs_coalesce_requests(struct list_head *head, struct list_head *dst, return npages; } +#define NFS_SCAN_MAXENTRIES 16 +/** + * nfs_scan_lock_dirty - Scan the radix tree for dirty requests + * @nfsi: NFS inode + * @dst: Destination list + * @idx_start: lower bound of page->index to scan + * @npages: idx_start + npages sets the upper bound to scan. + * + * Moves elements from one of the inode request lists. + * If the number of requests is set to 0, the entire address_space + * starting at index idx_start, is scanned. + * The requests are *not* checked to ensure that they form a contiguous set. + * You must be holding the inode's req_lock when calling this function + */ +int +nfs_scan_lock_dirty(struct nfs_inode *nfsi, struct list_head *dst, + unsigned long idx_start, unsigned int npages) +{ + struct nfs_page *pgvec[NFS_SCAN_MAXENTRIES]; + struct nfs_page *req; + unsigned long idx_end; + int found, i; + int res; + + res = 0; + if (npages == 0) + idx_end = ~0; + else + idx_end = idx_start + npages - 1; + + for (;;) { + found = radix_tree_gang_lookup_tag(&nfsi->nfs_page_tree, + (void **)&pgvec[0], idx_start, NFS_SCAN_MAXENTRIES, + NFS_PAGE_TAG_DIRTY); + if (found <= 0) + break; + for (i = 0; i < found; i++) { + req = pgvec[i]; + if (req->wb_index > idx_end) + goto out; + + idx_start = req->wb_index + 1; + + if (nfs_set_page_writeback_locked(req)) { + radix_tree_tag_clear(&nfsi->nfs_page_tree, + req->wb_index, NFS_PAGE_TAG_DIRTY); + nfs_list_remove_request(req); + nfs_list_add_request(req, dst); + res++; + } + } + } +out: + return res; +} + /** * nfs_scan_list - Scan a list for matching requests * @head: One of the NFS inode request lists diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 58a39b0486a7..5130eda231d7 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -352,7 +352,7 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc) if (err < 0) goto out; } - err = nfs_commit_inode(inode, 0, 0, wb_priority(wbc)); + err = nfs_commit_inode(inode, wb_priority(wbc)); if (err > 0) { wbc->nr_to_write -= err; err = 0; @@ -446,6 +446,8 @@ nfs_mark_request_dirty(struct nfs_page *req) struct nfs_inode *nfsi = NFS_I(inode); spin_lock(&nfsi->req_lock); + radix_tree_tag_set(&nfsi->nfs_page_tree, + req->wb_index, NFS_PAGE_TAG_DIRTY); nfs_list_add_request(req, &nfsi->dirty); nfsi->ndirty++; spin_unlock(&nfsi->req_lock); @@ -537,12 +539,15 @@ static int nfs_scan_dirty(struct inode *inode, struct list_head *dst, unsigned long idx_start, unsigned int npages) { struct nfs_inode *nfsi = NFS_I(inode); - int res; - res = nfs_scan_list(&nfsi->dirty, dst, idx_start, npages); - nfsi->ndirty -= res; - sub_page_state(nr_dirty,res); - if ((nfsi->ndirty == 0) != list_empty(&nfsi->dirty)) - printk(KERN_ERR "NFS: desynchronized value of nfs_i.ndirty.\n"); + int res = 0; + + if (nfsi->ndirty != 0) { + res = nfs_scan_lock_dirty(nfsi, dst, idx_start, npages); + nfsi->ndirty -= res; + sub_page_state(nr_dirty,res); + if ((nfsi->ndirty == 0) != list_empty(&nfsi->dirty)) + printk(KERN_ERR "NFS: desynchronized value of nfs_i.ndirty.\n"); + } return res; } @@ -561,11 +566,14 @@ static int nfs_scan_commit(struct inode *inode, struct list_head *dst, unsigned long idx_start, unsigned int npages) { struct nfs_inode *nfsi = NFS_I(inode); - int res; - res = nfs_scan_list(&nfsi->commit, dst, idx_start, npages); - nfsi->ncommit -= res; - if ((nfsi->ncommit == 0) != list_empty(&nfsi->commit)) - printk(KERN_ERR "NFS: desynchronized value of nfs_i.ncommit.\n"); + int res = 0; + + if (nfsi->ncommit != 0) { + res = nfs_scan_list(&nfsi->commit, dst, idx_start, npages); + nfsi->ncommit -= res; + if ((nfsi->ncommit == 0) != list_empty(&nfsi->commit)) + printk(KERN_ERR "NFS: desynchronized value of nfs_i.ncommit.\n"); + } return res; } #endif @@ -1209,36 +1217,24 @@ static void nfs_commit_rpcsetup(struct list_head *head, struct nfs_write_data *data, int how) { struct rpc_task *task = &data->task; - struct nfs_page *first, *last; + struct nfs_page *first; struct inode *inode; - loff_t start, end, len; /* Set up the RPC argument and reply structs * NB: take care not to mess about with data->commit et al. */ list_splice_init(head, &data->pages); first = nfs_list_entry(data->pages.next); - last = nfs_list_entry(data->pages.prev); inode = first->wb_context->dentry->d_inode; - /* - * Determine the offset range of requests in the COMMIT call. - * We rely on the fact that data->pages is an ordered list... - */ - start = req_offset(first); - end = req_offset(last) + last->wb_bytes; - len = end - start; - /* If 'len' is not a 32-bit quantity, pass '0' in the COMMIT call */ - if (end >= i_size_read(inode) || len < 0 || len > (~((u32)0) >> 1)) - len = 0; - data->inode = inode; data->cred = first->wb_context->cred; data->args.fh = NFS_FH(data->inode); - data->args.offset = start; - data->args.count = len; - data->res.count = len; + /* Note: we always request a commit of the entire inode */ + data->args.offset = 0; + data->args.count = 0; + data->res.count = 0; data->res.fattr = &data->fattr; data->res.verf = &data->verf; @@ -1357,8 +1353,7 @@ static int nfs_flush_inode(struct inode *inode, unsigned long idx_start, } #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) -int nfs_commit_inode(struct inode *inode, unsigned long idx_start, - unsigned int npages, int how) +int nfs_commit_inode(struct inode *inode, int how) { struct nfs_inode *nfsi = NFS_I(inode); LIST_HEAD(head); @@ -1366,15 +1361,13 @@ int nfs_commit_inode(struct inode *inode, unsigned long idx_start, error = 0; spin_lock(&nfsi->req_lock); - res = nfs_scan_commit(inode, &head, idx_start, npages); + res = nfs_scan_commit(inode, &head, 0, 0); + spin_unlock(&nfsi->req_lock); if (res) { - res += nfs_scan_commit(inode, &head, 0, 0); - spin_unlock(&nfsi->req_lock); error = nfs_commit_list(&head, how); - } else - spin_unlock(&nfsi->req_lock); - if (error < 0) - return error; + if (error < 0) + return error; + } return res; } #endif @@ -1396,7 +1389,7 @@ int nfs_sync_inode(struct inode *inode, unsigned long idx_start, error = nfs_flush_inode(inode, idx_start, npages, how); #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) if (error == 0) - error = nfs_commit_inode(inode, idx_start, npages, how); + error = nfs_commit_inode(inode, how); #endif } while (error > 0); return error; diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 2954e44ed498..8ea249110fb0 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -395,10 +395,10 @@ extern void nfs_commit_done(struct rpc_task *); */ extern int nfs_sync_inode(struct inode *, unsigned long, unsigned int, int); #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) -extern int nfs_commit_inode(struct inode *, unsigned long, unsigned int, int); +extern int nfs_commit_inode(struct inode *, int); #else static inline int -nfs_commit_inode(struct inode *inode, unsigned long idx_start, unsigned int npages, int how) +nfs_commit_inode(struct inode *inode, int how) { return 0; } diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index db40e4590ba2..da2e077b65e2 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -22,6 +22,7 @@ /* * Valid flags for the radix tree */ +#define NFS_PAGE_TAG_DIRTY 0 #define NFS_PAGE_TAG_WRITEBACK 1 /* @@ -31,6 +32,7 @@ #define PG_NEED_COMMIT 1 #define PG_NEED_RESCHED 2 +struct nfs_inode; struct nfs_page { struct list_head wb_list, /* Defines state of page: */ *wb_list_head; /* read/write/commit */ @@ -59,8 +61,8 @@ extern void nfs_clear_request(struct nfs_page *req); extern void nfs_release_request(struct nfs_page *req); -extern void nfs_list_add_request(struct nfs_page *, struct list_head *); - +extern int nfs_scan_lock_dirty(struct nfs_inode *nfsi, struct list_head *dst, + unsigned long idx_start, unsigned int npages); extern int nfs_scan_list(struct list_head *, struct list_head *, unsigned long, unsigned int); extern int nfs_coalesce_requests(struct list_head *, struct list_head *, @@ -94,6 +96,18 @@ nfs_lock_request(struct nfs_page *req) return 1; } +/** + * nfs_list_add_request - Insert a request into a list + * @req: request + * @head: head of list into which to insert the request. + */ +static inline void +nfs_list_add_request(struct nfs_page *req, struct list_head *head) +{ + list_add_tail(&req->wb_list, head); + req->wb_list_head = head; +} + /** * nfs_list_remove_request - Remove a request from its wb_list -- cgit v1.2.3-59-g8ed1b From 80fec4c62e2cf544ac26e53f3e0d2f73df6820b9 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 22 Jun 2005 17:16:31 +0000 Subject: [PATCH] VFS: Ensure that all the on-stack struct file_lock call fl_release_private Signed-off-by: Trond Myklebust --- fs/locks.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/locks.c b/fs/locks.c index 3fa6a7ce57a7..a0bc03495bd4 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -1548,6 +1548,8 @@ int fcntl_getlk(struct file *filp, struct flock __user *l) if (filp->f_op && filp->f_op->lock) { error = filp->f_op->lock(filp, F_GETLK, &file_lock); + if (file_lock.fl_ops && file_lock.fl_ops->fl_release_private) + file_lock.fl_ops->fl_release_private(&file_lock); if (error < 0) goto out; else @@ -1690,6 +1692,8 @@ int fcntl_getlk64(struct file *filp, struct flock64 __user *l) if (filp->f_op && filp->f_op->lock) { error = filp->f_op->lock(filp, F_GETLK, &file_lock); + if (file_lock.fl_ops && file_lock.fl_ops->fl_release_private) + file_lock.fl_ops->fl_release_private(&file_lock); if (error < 0) goto out; else @@ -1873,6 +1877,8 @@ void locks_remove_flock(struct file *filp) .fl_end = OFFSET_MAX, }; filp->f_op->flock(filp, F_SETLKW, &fl); + if (fl.fl_ops && fl.fl_ops->fl_release_private) + fl.fl_ops->fl_release_private(&fl); } lock_kernel(); -- cgit v1.2.3-59-g8ed1b From 4f15e2b1f4f3a56e46201714b39436c32218d547 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 22 Jun 2005 17:16:31 +0000 Subject: [PATCH] NLM: cleanup for blocked locks. Signed-off-by: Trond Myklebust --- fs/lockd/clntlock.c | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c index ef7103b8c5bd..44adb84183b6 100644 --- a/fs/lockd/clntlock.c +++ b/fs/lockd/clntlock.c @@ -31,7 +31,7 @@ static int reclaimer(void *ptr); * This is the representation of a blocked client lock. */ struct nlm_wait { - struct nlm_wait * b_next; /* linked list */ + struct list_head b_list; /* linked list */ wait_queue_head_t b_wait; /* where to wait on */ struct nlm_host * b_host; struct file_lock * b_lock; /* local file lock */ @@ -39,7 +39,7 @@ struct nlm_wait { u32 b_status; /* grant callback status */ }; -static struct nlm_wait * nlm_blocked; +static LIST_HEAD(nlm_blocked); /* * Block on a lock @@ -55,8 +55,7 @@ nlmclnt_block(struct nlm_host *host, struct file_lock *fl, u32 *statp) block.b_lock = fl; init_waitqueue_head(&block.b_wait); block.b_status = NLM_LCK_BLOCKED; - block.b_next = nlm_blocked; - nlm_blocked = █ + list_add(&block.b_list, &nlm_blocked); /* Remember pseudo nsm state */ pstate = host->h_state; @@ -71,12 +70,7 @@ nlmclnt_block(struct nlm_host *host, struct file_lock *fl, u32 *statp) */ sleep_on_timeout(&block.b_wait, 30*HZ); - for (head = &nlm_blocked; *head; head = &(*head)->b_next) { - if (*head == &block) { - *head = block.b_next; - break; - } - } + list_del(&block.b_list); if (!signalled()) { *statp = block.b_status; @@ -105,7 +99,7 @@ nlmclnt_grant(struct nlm_lock *lock) * Look up blocked request based on arguments. * Warning: must not use cookie to match it! */ - for (block = nlm_blocked; block; block = block->b_next) { + list_for_each_entry(block, &nlm_blocked, b_list) { if (nlm_compare_locks(block->b_lock, &lock->fl)) break; } @@ -230,7 +224,7 @@ restart: host->h_reclaiming = 0; /* Now, wake up all processes that sleep on a blocked lock */ - for (block = nlm_blocked; block; block = block->b_next) { + list_for_each_entry(block, &nlm_blocked, b_list) { if (block->b_host == host) { block->b_status = NLM_LCK_DENIED_GRACE_PERIOD; wake_up(&block->b_wait); -- cgit v1.2.3-59-g8ed1b From ecdbf769b2cb8903e07cd482334c714d89fd1146 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 22 Jun 2005 17:16:31 +0000 Subject: [PATCH] NLM: fix a client-side race on blocking locks. If the lock blocks, the server may send us a GRANTED message that races with the reply to our LOCK request. Make sure that we catch the GRANTED by queueing up our request on the nlm_blocked list before we send off the first LOCK rpc call. Signed-off-by: Trond Myklebust --- fs/lockd/clntlock.c | 99 ++++++++++++++++++++++++++------------------- fs/lockd/clntproc.c | 40 ++++++++++++++---- include/linux/lockd/lockd.h | 7 +++- 3 files changed, 96 insertions(+), 50 deletions(-) diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c index 44adb84183b6..006bb9e14579 100644 --- a/fs/lockd/clntlock.c +++ b/fs/lockd/clntlock.c @@ -42,23 +42,51 @@ struct nlm_wait { static LIST_HEAD(nlm_blocked); /* - * Block on a lock + * Queue up a lock for blocking so that the GRANTED request can see it */ -int -nlmclnt_block(struct nlm_host *host, struct file_lock *fl, u32 *statp) +int nlmclnt_prepare_block(struct nlm_rqst *req, struct nlm_host *host, struct file_lock *fl) +{ + struct nlm_wait *block; + + BUG_ON(req->a_block != NULL); + block = kmalloc(sizeof(*block), GFP_KERNEL); + if (block == NULL) + return -ENOMEM; + block->b_host = host; + block->b_lock = fl; + init_waitqueue_head(&block->b_wait); + block->b_status = NLM_LCK_BLOCKED; + + list_add(&block->b_list, &nlm_blocked); + req->a_block = block; + + return 0; +} + +void nlmclnt_finish_block(struct nlm_rqst *req) { - struct nlm_wait block, **head; - int err; - u32 pstate; + struct nlm_wait *block = req->a_block; - block.b_host = host; - block.b_lock = fl; - init_waitqueue_head(&block.b_wait); - block.b_status = NLM_LCK_BLOCKED; - list_add(&block.b_list, &nlm_blocked); + if (block == NULL) + return; + req->a_block = NULL; + list_del(&block->b_list); + kfree(block); +} - /* Remember pseudo nsm state */ - pstate = host->h_state; +/* + * Block on a lock + */ +long nlmclnt_block(struct nlm_rqst *req, long timeout) +{ + struct nlm_wait *block = req->a_block; + long ret; + + /* A borken server might ask us to block even if we didn't + * request it. Just say no! + */ + if (!req->a_args.block) + return -EAGAIN; /* Go to sleep waiting for GRANT callback. Some servers seem * to lose callbacks, however, so we're going to poll from @@ -68,23 +96,16 @@ nlmclnt_block(struct nlm_host *host, struct file_lock *fl, u32 *statp) * a 1 minute timeout would do. See the comment before * nlmclnt_lock for an explanation. */ - sleep_on_timeout(&block.b_wait, 30*HZ); + ret = wait_event_interruptible_timeout(block->b_wait, + block->b_status != NLM_LCK_BLOCKED, + timeout); - list_del(&block.b_list); - - if (!signalled()) { - *statp = block.b_status; - return 0; + if (block->b_status != NLM_LCK_BLOCKED) { + req->a_res.status = block->b_status; + block->b_status = NLM_LCK_BLOCKED; } - /* Okay, we were interrupted. Cancel the pending request - * unless the server has rebooted. - */ - if (pstate == host->h_state && (err = nlmclnt_cancel(host, fl)) < 0) - printk(KERN_NOTICE - "lockd: CANCEL call failed (errno %d)\n", -err); - - return -ERESTARTSYS; + return ret; } /* @@ -94,27 +115,23 @@ u32 nlmclnt_grant(struct nlm_lock *lock) { struct nlm_wait *block; + u32 res = nlm_lck_denied; /* * Look up blocked request based on arguments. * Warning: must not use cookie to match it! */ list_for_each_entry(block, &nlm_blocked, b_list) { - if (nlm_compare_locks(block->b_lock, &lock->fl)) - break; + if (nlm_compare_locks(block->b_lock, &lock->fl)) { + /* Alright, we found a lock. Set the return status + * and wake up the caller + */ + block->b_status = NLM_LCK_GRANTED; + wake_up(&block->b_wait); + res = nlm_granted; + } } - - /* Ooops, no blocked request found. */ - if (block == NULL) - return nlm_lck_denied; - - /* Alright, we found the lock. Set the return status and - * wake up the caller. - */ - block->b_status = NLM_LCK_GRANTED; - wake_up(&block->b_wait); - - return nlm_granted; + return res; } /* diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c index a4407619b1f1..fd77ed1d710d 100644 --- a/fs/lockd/clntproc.c +++ b/fs/lockd/clntproc.c @@ -21,6 +21,7 @@ #define NLMDBG_FACILITY NLMDBG_CLIENT #define NLMCLNT_GRACE_WAIT (5*HZ) +#define NLMCLNT_POLL_TIMEOUT (30*HZ) static int nlmclnt_test(struct nlm_rqst *, struct file_lock *); static int nlmclnt_lock(struct nlm_rqst *, struct file_lock *); @@ -553,7 +554,8 @@ nlmclnt_lock(struct nlm_rqst *req, struct file_lock *fl) { struct nlm_host *host = req->a_host; struct nlm_res *resp = &req->a_res; - int status; + long timeout; + int status; if (!host->h_monitored && nsm_monitor(host) < 0) { printk(KERN_NOTICE "lockd: failed to monitor %s\n", @@ -562,15 +564,32 @@ nlmclnt_lock(struct nlm_rqst *req, struct file_lock *fl) goto out; } - do { - if ((status = nlmclnt_call(req, NLMPROC_LOCK)) >= 0) { - if (resp->status != NLM_LCK_BLOCKED) - break; - status = nlmclnt_block(host, fl, &resp->status); - } + if (req->a_args.block) { + status = nlmclnt_prepare_block(req, host, fl); if (status < 0) goto out; - } while (resp->status == NLM_LCK_BLOCKED && req->a_args.block); + } + for(;;) { + status = nlmclnt_call(req, NLMPROC_LOCK); + if (status < 0) + goto out_unblock; + if (resp->status != NLM_LCK_BLOCKED) + break; + /* Wait on an NLM blocking lock */ + timeout = nlmclnt_block(req, NLMCLNT_POLL_TIMEOUT); + /* Did a reclaimer thread notify us of a server reboot? */ + if (resp->status == NLM_LCK_DENIED_GRACE_PERIOD) + continue; + if (resp->status != NLM_LCK_BLOCKED) + break; + if (timeout >= 0) + continue; + /* We were interrupted. Send a CANCEL request to the server + * and exit + */ + status = (int)timeout; + goto out_unblock; + } if (resp->status == NLM_LCK_GRANTED) { fl->fl_u.nfs_fl.state = host->h_state; @@ -579,6 +598,11 @@ nlmclnt_lock(struct nlm_rqst *req, struct file_lock *fl) do_vfs_lock(fl); } status = nlm_stat_to_errno(resp->status); +out_unblock: + nlmclnt_finish_block(req); + /* Cancel the blocked request if it is still pending */ + if (resp->status == NLM_LCK_BLOCKED) + nlmclnt_cancel(host, fl); out: nlmclnt_release_lockargs(req); return status; diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index 0d9d22578212..16d4e5a08e1d 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -72,6 +72,8 @@ struct nlm_lockowner { uint32_t pid; }; +struct nlm_wait; + /* * Memory chunk for NLM client RPC request. */ @@ -81,6 +83,7 @@ struct nlm_rqst { struct nlm_host * a_host; /* host handle */ struct nlm_args a_args; /* arguments */ struct nlm_res a_res; /* result */ + struct nlm_wait * a_block; char a_owner[NLMCLNT_OHSIZE]; }; @@ -142,7 +145,9 @@ extern unsigned long nlmsvc_timeout; * Lockd client functions */ struct nlm_rqst * nlmclnt_alloc_call(void); -int nlmclnt_block(struct nlm_host *, struct file_lock *, u32 *); +int nlmclnt_prepare_block(struct nlm_rqst *req, struct nlm_host *host, struct file_lock *fl); +void nlmclnt_finish_block(struct nlm_rqst *req); +long nlmclnt_block(struct nlm_rqst *req, long timeout); int nlmclnt_cancel(struct nlm_host *, struct file_lock *); u32 nlmclnt_grant(struct nlm_lock *); void nlmclnt_recovery(struct nlm_host *, u32); -- cgit v1.2.3-59-g8ed1b From 8d0a8a9d0ec790086c64d210af413ac351d89e35 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 22 Jun 2005 17:16:32 +0000 Subject: [PATCH] NFSv4: Clean up nfs4 lock state accounting Ensure that lock owner structures are not released prematurely. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4_fs.h | 9 +-- fs/nfs/nfs4proc.c | 69 ++++++++---------- fs/nfs/nfs4state.c | 178 +++++++++++++++++++++-------------------------- include/linux/fs.h | 1 + include/linux/nfs_fs_i.h | 5 ++ 5 files changed, 118 insertions(+), 144 deletions(-) diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 7c6f1d668fbd..ec1a22d7b876 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -128,6 +128,7 @@ struct nfs4_state_owner { struct nfs4_lock_state { struct list_head ls_locks; /* Other lock stateids */ + struct nfs4_state * ls_state; /* Pointer to open state */ fl_owner_t ls_owner; /* POSIX lock owner */ #define NFS_LOCK_INITIALIZED 1 int ls_flags; @@ -153,7 +154,7 @@ struct nfs4_state { unsigned long flags; /* Do we hold any locks? */ struct semaphore lock_sema; /* Serializes file locking operations */ - rwlock_t state_lock; /* Protects the lock_states list */ + spinlock_t state_lock; /* Protects the lock_states list */ nfs4_stateid stateid; @@ -225,12 +226,8 @@ extern void nfs4_close_state(struct nfs4_state *, mode_t); extern struct nfs4_state *nfs4_find_state(struct inode *, struct rpc_cred *, mode_t mode); extern void nfs4_increment_seqid(int status, struct nfs4_state_owner *sp); extern void nfs4_schedule_state_recovery(struct nfs4_client *); -extern struct nfs4_lock_state *nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t); -extern struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_owner_t); -extern void nfs4_put_lock_state(struct nfs4_lock_state *state); +extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl); extern void nfs4_increment_lock_seqid(int status, struct nfs4_lock_state *ls); -extern void nfs4_notify_setlk(struct nfs4_state *, struct file_lock *, struct nfs4_lock_state *); -extern void nfs4_notify_unlck(struct nfs4_state *, struct file_lock *, struct nfs4_lock_state *); extern void nfs4_copy_stateid(nfs4_stateid *, struct nfs4_state *, fl_owner_t); extern const nfs4_stateid zero_stateid; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index af80b5981486..0ddc20102d46 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2626,14 +2626,11 @@ static int _nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock down_read(&clp->cl_sem); nlo.clientid = clp->cl_clientid; down(&state->lock_sema); - lsp = nfs4_find_lock_state(state, request->fl_owner); - if (lsp) - nlo.id = lsp->ls_id; - else { - spin_lock(&clp->cl_lock); - nlo.id = nfs4_alloc_lockowner_id(clp); - spin_unlock(&clp->cl_lock); - } + status = nfs4_set_lock_state(state, request); + if (status != 0) + goto out; + lsp = request->fl_u.nfs4_fl.owner; + nlo.id = lsp->ls_id; arg.u.lockt = &nlo; status = rpc_call_sync(server->client, &msg, 0); if (!status) { @@ -2654,8 +2651,7 @@ static int _nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock request->fl_pid = 0; status = 0; } - if (lsp) - nfs4_put_lock_state(lsp); +out: up(&state->lock_sema); up_read(&clp->cl_sem); return status; @@ -2715,28 +2711,26 @@ static int _nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock }; struct nfs4_lock_state *lsp; struct nfs_locku_opargs luargs; - int status = 0; + int status; down_read(&clp->cl_sem); down(&state->lock_sema); - lsp = nfs4_find_lock_state(state, request->fl_owner); - if (!lsp) + status = nfs4_set_lock_state(state, request); + if (status != 0) goto out; + lsp = request->fl_u.nfs4_fl.owner; /* We might have lost the locks! */ - if ((lsp->ls_flags & NFS_LOCK_INITIALIZED) != 0) { - luargs.seqid = lsp->ls_seqid; - memcpy(&luargs.stateid, &lsp->ls_stateid, sizeof(luargs.stateid)); - arg.u.locku = &luargs; - status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR); - nfs4_increment_lock_seqid(status, lsp); - } + if ((lsp->ls_flags & NFS_LOCK_INITIALIZED) == 0) + goto out; + luargs.seqid = lsp->ls_seqid; + memcpy(&luargs.stateid, &lsp->ls_stateid, sizeof(luargs.stateid)); + arg.u.locku = &luargs; + status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR); + nfs4_increment_lock_seqid(status, lsp); - if (status == 0) { + if (status == 0) memcpy(&lsp->ls_stateid, &res.u.stateid, sizeof(lsp->ls_stateid)); - nfs4_notify_unlck(state, request, lsp); - } - nfs4_put_lock_state(lsp); out: up(&state->lock_sema); if (status == 0) @@ -2762,7 +2756,7 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *r { struct inode *inode = state->inode; struct nfs_server *server = NFS_SERVER(inode); - struct nfs4_lock_state *lsp; + struct nfs4_lock_state *lsp = request->fl_u.nfs4_fl.owner; struct nfs_lockargs arg = { .fh = NFS_FH(inode), .type = nfs4_lck_type(cmd, request), @@ -2784,9 +2778,6 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *r }; int status; - lsp = nfs4_get_lock_state(state, request->fl_owner); - if (lsp == NULL) - return -ENOMEM; if (!(lsp->ls_flags & NFS_LOCK_INITIALIZED)) { struct nfs4_state_owner *owner = state->owner; struct nfs_open_to_lock otl = { @@ -2808,27 +2799,26 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *r * seqid mutating errors */ nfs4_increment_seqid(status, owner); up(&owner->so_sema); + if (status == 0) { + lsp->ls_flags |= NFS_LOCK_INITIALIZED; + lsp->ls_seqid++; + } } else { struct nfs_exist_lock el = { .seqid = lsp->ls_seqid, }; memcpy(&el.stateid, &lsp->ls_stateid, sizeof(el.stateid)); largs.u.exist_lock = ⪙ - largs.new_lock_owner = 0; arg.u.lock = &largs; status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR); + /* increment seqid on success, and * seqid mutating errors*/ + nfs4_increment_lock_seqid(status, lsp); } - /* increment seqid on success, and * seqid mutating errors*/ - nfs4_increment_lock_seqid(status, lsp); /* save the returned stateid. */ - if (status == 0) { + if (status == 0) memcpy(&lsp->ls_stateid, &res.u.stateid, sizeof(nfs4_stateid)); - lsp->ls_flags |= NFS_LOCK_INITIALIZED; - if (!reclaim) - nfs4_notify_setlk(state, request, lsp); - } else if (status == -NFS4ERR_DENIED) + else if (status == -NFS4ERR_DENIED) status = -EAGAIN; - nfs4_put_lock_state(lsp); return status; } @@ -2869,7 +2859,9 @@ static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock down_read(&clp->cl_sem); down(&state->lock_sema); - status = _nfs4_do_setlk(state, cmd, request, 0); + status = nfs4_set_lock_state(state, request); + if (status == 0) + status = _nfs4_do_setlk(state, cmd, request, 0); up(&state->lock_sema); if (status == 0) { /* Note: we always want to sleep here! */ @@ -2927,7 +2919,6 @@ nfs4_proc_lock(struct file *filp, int cmd, struct file_lock *request) if (signalled()) break; } while(status < 0); - return status; } diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 591ad1d51880..afe587d82f1e 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -360,7 +360,7 @@ nfs4_alloc_open_state(void) atomic_set(&state->count, 1); INIT_LIST_HEAD(&state->lock_states); init_MUTEX(&state->lock_sema); - rwlock_init(&state->state_lock); + spin_lock_init(&state->state_lock); return state; } @@ -542,16 +542,6 @@ __nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t fl_owner) return NULL; } -struct nfs4_lock_state * -nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t fl_owner) -{ - struct nfs4_lock_state *lsp; - read_lock(&state->state_lock); - lsp = __nfs4_find_lock_state(state, fl_owner); - read_unlock(&state->state_lock); - return lsp; -} - /* * Return a compatible lock_state. If no initialized lock_state structure * exists, return an uninitialized one. @@ -568,14 +558,13 @@ static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, f return NULL; lsp->ls_flags = 0; lsp->ls_seqid = 0; /* arbitrary */ - lsp->ls_id = -1; memset(lsp->ls_stateid.data, 0, sizeof(lsp->ls_stateid.data)); atomic_set(&lsp->ls_count, 1); lsp->ls_owner = fl_owner; - INIT_LIST_HEAD(&lsp->ls_locks); spin_lock(&clp->cl_lock); lsp->ls_id = nfs4_alloc_lockowner_id(clp); spin_unlock(&clp->cl_lock); + INIT_LIST_HEAD(&lsp->ls_locks); return lsp; } @@ -585,121 +574,112 @@ static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, f * * The caller must be holding state->lock_sema and clp->cl_sem */ -struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_owner_t owner) +static struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_owner_t owner) { - struct nfs4_lock_state * lsp; + struct nfs4_lock_state *lsp, *new = NULL; - lsp = nfs4_find_lock_state(state, owner); - if (lsp == NULL) - lsp = nfs4_alloc_lock_state(state, owner); + for(;;) { + spin_lock(&state->state_lock); + lsp = __nfs4_find_lock_state(state, owner); + if (lsp != NULL) + break; + if (new != NULL) { + new->ls_state = state; + list_add(&new->ls_locks, &state->lock_states); + set_bit(LK_STATE_IN_USE, &state->flags); + lsp = new; + new = NULL; + break; + } + spin_unlock(&state->state_lock); + new = nfs4_alloc_lock_state(state, owner); + if (new == NULL) + return NULL; + } + spin_unlock(&state->state_lock); + kfree(new); return lsp; } /* - * Byte-range lock aware utility to initialize the stateid of read/write - * requests. + * Release reference to lock_state, and free it if we see that + * it is no longer in use */ -void -nfs4_copy_stateid(nfs4_stateid *dst, struct nfs4_state *state, fl_owner_t fl_owner) +static void nfs4_put_lock_state(struct nfs4_lock_state *lsp) { - if (test_bit(LK_STATE_IN_USE, &state->flags)) { - struct nfs4_lock_state *lsp; + struct nfs4_state *state; - lsp = nfs4_find_lock_state(state, fl_owner); - if (lsp) { - memcpy(dst, &lsp->ls_stateid, sizeof(*dst)); - nfs4_put_lock_state(lsp); - return; - } - } - memcpy(dst, &state->stateid, sizeof(*dst)); + if (lsp == NULL) + return; + state = lsp->ls_state; + if (!atomic_dec_and_lock(&lsp->ls_count, &state->state_lock)) + return; + list_del(&lsp->ls_locks); + if (list_empty(&state->lock_states)) + clear_bit(LK_STATE_IN_USE, &state->flags); + spin_unlock(&state->state_lock); + kfree(lsp); } -/* -* Called with state->lock_sema and clp->cl_sem held. -*/ -void nfs4_increment_lock_seqid(int status, struct nfs4_lock_state *lsp) +static void nfs4_fl_copy_lock(struct file_lock *dst, struct file_lock *src) { - if (status == NFS_OK || seqid_mutating_err(-status)) - lsp->ls_seqid++; -} + struct nfs4_lock_state *lsp = src->fl_u.nfs4_fl.owner; -/* -* Check to see if the request lock (type FL_UNLK) effects the fl lock. -* -* fl and request must have the same posix owner -* -* return: -* 0 -> fl not effected by request -* 1 -> fl consumed by request -*/ + dst->fl_u.nfs4_fl.owner = lsp; + atomic_inc(&lsp->ls_count); +} -static int -nfs4_check_unlock(struct file_lock *fl, struct file_lock *request) +static void nfs4_fl_release_lock(struct file_lock *fl) { - if (fl->fl_start >= request->fl_start && fl->fl_end <= request->fl_end) - return 1; - return 0; + nfs4_put_lock_state(fl->fl_u.nfs4_fl.owner); } -/* - * Post an initialized lock_state on the state->lock_states list. - */ -void nfs4_notify_setlk(struct nfs4_state *state, struct file_lock *request, struct nfs4_lock_state *lsp) +static struct file_lock_operations nfs4_fl_lock_ops = { + .fl_copy_lock = nfs4_fl_copy_lock, + .fl_release_private = nfs4_fl_release_lock, +}; + +int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl) { - if (!list_empty(&lsp->ls_locks)) - return; - atomic_inc(&lsp->ls_count); - write_lock(&state->state_lock); - list_add(&lsp->ls_locks, &state->lock_states); - set_bit(LK_STATE_IN_USE, &state->flags); - write_unlock(&state->state_lock); + struct nfs4_lock_state *lsp; + + if (fl->fl_ops != NULL) + return 0; + lsp = nfs4_get_lock_state(state, fl->fl_owner); + if (lsp == NULL) + return -ENOMEM; + fl->fl_u.nfs4_fl.owner = lsp; + fl->fl_ops = &nfs4_fl_lock_ops; + return 0; } -/* - * to decide to 'reap' lock state: - * 1) search i_flock for file_locks with fl.lock_state = to ls. - * 2) determine if unlock will consume found lock. - * if so, reap - * - * else, don't reap. - * +/* + * Byte-range lock aware utility to initialize the stateid of read/write + * requests. */ -void -nfs4_notify_unlck(struct nfs4_state *state, struct file_lock *request, struct nfs4_lock_state *lsp) +void nfs4_copy_stateid(nfs4_stateid *dst, struct nfs4_state *state, fl_owner_t fl_owner) { - struct inode *inode = state->inode; - struct file_lock *fl; + struct nfs4_lock_state *lsp; - for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { - if (!(fl->fl_flags & FL_POSIX)) - continue; - if (fl->fl_owner != lsp->ls_owner) - continue; - /* Exit if we find at least one lock which is not consumed */ - if (nfs4_check_unlock(fl,request) == 0) - return; - } + memcpy(dst, &state->stateid, sizeof(*dst)); + if (test_bit(LK_STATE_IN_USE, &state->flags) == 0) + return; - write_lock(&state->state_lock); - list_del_init(&lsp->ls_locks); - if (list_empty(&state->lock_states)) - clear_bit(LK_STATE_IN_USE, &state->flags); - write_unlock(&state->state_lock); + spin_lock(&state->state_lock); + lsp = __nfs4_find_lock_state(state, fl_owner); + if (lsp != NULL && (lsp->ls_flags & NFS_LOCK_INITIALIZED) != 0) + memcpy(dst, &lsp->ls_stateid, sizeof(*dst)); + spin_unlock(&state->state_lock); nfs4_put_lock_state(lsp); } /* - * Release reference to lock_state, and free it if we see that - * it is no longer in use - */ -void -nfs4_put_lock_state(struct nfs4_lock_state *lsp) +* Called with state->lock_sema and clp->cl_sem held. +*/ +void nfs4_increment_lock_seqid(int status, struct nfs4_lock_state *lsp) { - if (!atomic_dec_and_test(&lsp->ls_count)) - return; - BUG_ON (!list_empty(&lsp->ls_locks)); - kfree(lsp); + if (status == NFS_OK || seqid_mutating_err(-status)) + lsp->ls_seqid++; } /* diff --git a/include/linux/fs.h b/include/linux/fs.h index 9b8b696d4f15..e5a8db00df29 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -674,6 +674,7 @@ struct file_lock { struct lock_manager_operations *fl_lmops; /* Callbacks for lockmanagers */ union { struct nfs_lock_info nfs_fl; + struct nfs4_lock_info nfs4_fl; } fl_u; }; diff --git a/include/linux/nfs_fs_i.h b/include/linux/nfs_fs_i.h index e9a749588a7b..e2c18dabff86 100644 --- a/include/linux/nfs_fs_i.h +++ b/include/linux/nfs_fs_i.h @@ -16,6 +16,11 @@ struct nfs_lock_info { struct nlm_lockowner *owner; }; +struct nfs4_lock_state; +struct nfs4_lock_info { + struct nfs4_lock_state *owner; +}; + /* * Lock flag values */ -- cgit v1.2.3-59-g8ed1b From 97d312d037e63e7c8ac004ffe3072f82a6d45495 Mon Sep 17 00:00:00 2001 From: Manoj Naik Date: Wed, 22 Jun 2005 17:16:39 +0000 Subject: [PATCH] NFSv4: add support for rdattr_error in NFSv4 readdir requests. Request RDATTR_ERROR as an attribute in readdir to distinguish between a directory being within an absent filesystem or one (or more) of its entries. Signed-off-by: Manoj Naik Signed-off-by: Trond Myklebust --- fs/nfs/nfs4xdr.c | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 325cd6d4f23a..4d655d252c6d 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -1014,6 +1014,10 @@ static int encode_read(struct xdr_stream *xdr, const struct nfs_readargs *args) static int encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg *readdir, struct rpc_rqst *req) { struct rpc_auth *auth = req->rq_task->tk_auth; + uint32_t attrs[2] = { + FATTR4_WORD0_RDATTR_ERROR|FATTR4_WORD0_FILEID, + FATTR4_WORD1_MOUNTED_ON_FILEID, + }; int replen; uint32_t *p; @@ -1024,13 +1028,13 @@ static int encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg WRITE32(readdir->count >> 1); /* We're not doing readdirplus */ WRITE32(readdir->count); WRITE32(2); - if (readdir->bitmask[1] & FATTR4_WORD1_MOUNTED_ON_FILEID) { - WRITE32(0); - WRITE32(FATTR4_WORD1_MOUNTED_ON_FILEID); - } else { - WRITE32(FATTR4_WORD0_FILEID); - WRITE32(0); - } + /* Switch to mounted_on_fileid if the server supports it */ + if (readdir->bitmask[1] & FATTR4_WORD1_MOUNTED_ON_FILEID) + attrs[0] &= ~FATTR4_WORD0_FILEID; + else + attrs[1] &= ~FATTR4_WORD1_MOUNTED_ON_FILEID; + WRITE32(attrs[0] & readdir->bitmask[0]); + WRITE32(attrs[1] & readdir->bitmask[1]); /* set up reply kvec * toplevel_status + taglen + rescount + OP_PUTFH + status @@ -4060,6 +4064,12 @@ uint32_t *nfs4_decode_dirent(uint32_t *p, struct nfs_entry *entry, int plus) } len = XDR_QUADLEN(ntohl(*p++)); /* attribute buffer length */ if (len > 0) { + if (bitmap[0] & FATTR4_WORD0_RDATTR_ERROR) { + bitmap[0] &= ~FATTR4_WORD0_RDATTR_ERROR; + /* Ignore the return value of rdattr_error for now */ + p++; + len--; + } if (bitmap[0] == 0 && bitmap[1] == FATTR4_WORD1_MOUNTED_ON_FILEID) xdr_decode_hyper(p, &entry->ino); else if (bitmap[0] == FATTR4_WORD0_FILEID) -- cgit v1.2.3-59-g8ed1b From 6ebf3656fd18430d90fbb3199b31d08178c37134 Mon Sep 17 00:00:00 2001 From: Manoj Naik Date: Wed, 22 Jun 2005 17:16:39 +0000 Subject: [PATCH] NFSv4: Map a couple of NFSv4 errors to EINVAL. This shows up on running tar over NFSv4. Signed-off-by: Manoj Naik Signed-off-by: Trond Myklebust --- fs/nfs/nfs4xdr.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 4d655d252c6d..577b4429c8f6 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -4109,6 +4109,8 @@ static struct { { NFS4ERR_DQUOT, EDQUOT }, { NFS4ERR_STALE, ESTALE }, { NFS4ERR_BADHANDLE, EBADHANDLE }, + { NFS4ERR_BADOWNER, EINVAL }, + { NFS4ERR_BADNAME, EINVAL }, { NFS4ERR_BAD_COOKIE, EBADCOOKIE }, { NFS4ERR_NOTSUPP, ENOTSUPP }, { NFS4ERR_TOOSMALL, ETOOSMALL }, -- cgit v1.2.3-59-g8ed1b From eadf4598e7ec37a234e70e965bd335860e58bda4 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 22 Jun 2005 17:16:39 +0000 Subject: [PATCH] NFS: Add debugging code to NFSv4 readdir Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 5 +++++ fs/nfs/nfs4xdr.c | 18 ++++++++++++++++++ 2 files changed, 23 insertions(+) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 0ddc20102d46..1b76f80aedb9 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1722,6 +1722,10 @@ static int _nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred, }; int status; + dprintk("%s: dentry = %s/%s, cookie = %Lu\n", __FUNCTION__, + dentry->d_parent->d_name.name, + dentry->d_name.name, + (unsigned long long)cookie); lock_kernel(); nfs4_setup_readdir(cookie, NFS_COOKIEVERF(dir), dentry, &args); res.pgbase = args.pgbase; @@ -1729,6 +1733,7 @@ static int _nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred, if (status == 0) memcpy(NFS_COOKIEVERF(dir), res.verifier.data, NFS4_VERIFIER_SIZE); unlock_kernel(); + dprintk("%s: returns %d\n", __FUNCTION__, status); return status; } diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 577b4429c8f6..6c564ef9489e 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -1035,6 +1035,13 @@ static int encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg attrs[1] &= ~FATTR4_WORD1_MOUNTED_ON_FILEID; WRITE32(attrs[0] & readdir->bitmask[0]); WRITE32(attrs[1] & readdir->bitmask[1]); + dprintk("%s: cookie = %Lu, verifier = 0x%x%x, bitmap = 0x%x%x\n", + __FUNCTION__, + (unsigned long long)readdir->cookie, + ((u32 *)readdir->verifier.data)[0], + ((u32 *)readdir->verifier.data)[1], + attrs[0] & readdir->bitmask[0], + attrs[1] & readdir->bitmask[1]); /* set up reply kvec * toplevel_status + taglen + rescount + OP_PUTFH + status @@ -1043,6 +1050,9 @@ static int encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg replen = (RPC_REPHDRSIZE + auth->au_rslack + 9) << 2; xdr_inline_pages(&req->rq_rcv_buf, replen, readdir->pages, readdir->pgbase, readdir->count); + dprintk("%s: inlined page args = (%u, %p, %u, %u)\n", + __FUNCTION__, replen, readdir->pages, + readdir->pgbase, readdir->count); return 0; } @@ -3066,6 +3076,11 @@ static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct n return status; READ_BUF(8); COPYMEM(readdir->verifier.data, 8); + dprintk("%s: verifier = 0x%x%x\n", + __FUNCTION__, + ((u32 *)readdir->verifier.data)[0], + ((u32 *)readdir->verifier.data)[1]); + hdrlen = (char *) p - (char *) iov->iov_base; recvd = rcvbuf->len - hdrlen; @@ -3080,12 +3095,14 @@ static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct n for (nr = 0; *p++; nr++) { if (p + 3 > end) goto short_pkt; + dprintk("cookie = %Lu, ", *((unsigned long long *)p)); p += 2; /* cookie */ len = ntohl(*p++); /* filename length */ if (len > NFS4_MAXNAMLEN) { printk(KERN_WARNING "NFS: giant filename in readdir (len 0x%x)\n", len); goto err_unmap; } + dprintk("filename = %*s\n", len, (char *)p); p += XDR_QUADLEN(len); if (p + 1 > end) goto short_pkt; @@ -3105,6 +3122,7 @@ out: kunmap_atomic(kaddr, KM_USER0); return 0; short_pkt: + dprintk("%s: short packet at entry %d\n", __FUNCTION__, nr); entry[0] = entry[1] = 0; /* truncate listing ? */ if (!nr) { -- cgit v1.2.3-59-g8ed1b