diff options
Diffstat (limited to '')
42 files changed, 10654 insertions, 6920 deletions
diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig index f368f3215f88..f6a2fd3015e7 100644 --- a/fs/nfsd/Kconfig +++ b/fs/nfsd/Kconfig @@ -35,18 +35,9 @@ config NFSD_V2_ACL bool depends on NFSD -config NFSD_V3 - bool "NFS server support for NFS version 3" - depends on NFSD - help - This option enables support in your system's NFS server for - version 3 of the NFS protocol (RFC 1813). - - If unsure, say Y. - config NFSD_V3_ACL bool "NFS server support for the NFSv3 ACL protocol extension" - depends on NFSD_V3 + depends on NFSD select NFSD_V2_ACL help Solaris NFS servers support an auxiliary NFSv3 ACL protocol that @@ -70,12 +61,13 @@ config NFSD_V3_ACL config NFSD_V4 bool "NFS server support for NFS version 4" depends on NFSD && PROC_FS - select NFSD_V3 select FS_POSIX_ACL select SUNRPC_GSS + select CRYPTO select CRYPTO_MD5 select CRYPTO_SHA256 select GRACE_PERIOD + select NFS_V4_2_SSC_HELPER if NFS_V4_2 help This option enables support in your system's NFS server for version 4 of the NFS protocol (RFC 3530). @@ -97,7 +89,7 @@ config NFSD_BLOCKLAYOUT help This option enables support for the exporting pNFS block layouts in the kernel's NFS server. The pNFS block layout enables NFS - clients to directly perform I/O to block devices accesible to both + clients to directly perform I/O to block devices accessible to both the server and the clients. See RFC 5663 for more details. If unsure, say N. @@ -107,11 +99,10 @@ config NFSD_SCSILAYOUT depends on NFSD_V4 && BLOCK select NFSD_PNFS select EXPORTFS_BLOCK_OPS - select BLK_SCSI_REQUEST help This option enables support for the exporting pNFS SCSI layouts in the kernel's NFS server. The pNFS SCSI layout enables NFS - clients to directly perform I/O to SCSI devices accesible to both + clients to directly perform I/O to SCSI devices accessible to both the server and the clients. See draft-ietf-nfsv4-scsi-layout for more details. @@ -125,7 +116,7 @@ config NFSD_FLEXFILELAYOUT This option enables support for the exporting pNFS Flex File layouts in the kernel's NFS server. The pNFS Flex File layout enables NFS clients to directly perform I/O to NFSv3 devices - accesible to both the server and the clients. See + accessible to both the server and the clients. See draft-ietf-nfsv4-flex-files for more details. Warning, this server implements the bare minimum functionality @@ -136,7 +127,7 @@ config NFSD_FLEXFILELAYOUT config NFSD_V4_2_INTER_SSC bool "NFSv4.2 inter server to server COPY" - depends on NFSD_V4 && NFS_V4_1 && NFS_V4_2 + depends on NFSD_V4 && NFS_V4_2 help This option enables support for NFSv4.2 inter server to server copy where the destination server calls the NFSv4.2 @@ -156,13 +147,3 @@ config NFSD_V4_SECURITY_LABEL If you do not wish to enable fine-grained security labels SELinux or Smack policies on NFSv4 files, say N. - -config NFSD_FAULT_INJECTION - bool "NFS server manual fault injection" - depends on NFSD_V4 && DEBUG_KERNEL && DEBUG_FS && BROKEN - help - This option enables support for manually injecting faults - into the NFS server. This is intended to be used for - testing error recovery on the NFS client. - - If unsure, say N. diff --git a/fs/nfsd/Makefile b/fs/nfsd/Makefile index 6a40b1afe703..805c06d5f1b4 100644 --- a/fs/nfsd/Makefile +++ b/fs/nfsd/Makefile @@ -12,10 +12,8 @@ nfsd-y += trace.o nfsd-y += nfssvc.o nfsctl.o nfsproc.o nfsfh.o vfs.o \ export.o auth.o lockd.o nfscache.o nfsxdr.o \ - stats.o filecache.o -nfsd-$(CONFIG_NFSD_FAULT_INJECTION) += fault_inject.o + stats.o filecache.o nfs3proc.o nfs3xdr.o nfsd-$(CONFIG_NFSD_V2_ACL) += nfs2acl.o -nfsd-$(CONFIG_NFSD_V3) += nfs3proc.o nfs3xdr.o nfsd-$(CONFIG_NFSD_V3_ACL) += nfs3acl.o nfsd-$(CONFIG_NFSD_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4idmap.o \ nfs4acl.o nfs4callback.o nfs4recover.o diff --git a/fs/nfsd/acl.h b/fs/nfsd/acl.h index ba14d2f4b64f..4b7324458a94 100644 --- a/fs/nfsd/acl.h +++ b/fs/nfsd/acl.h @@ -38,6 +38,8 @@ struct nfs4_acl; struct svc_fh; struct svc_rqst; +struct nfsd_attrs; +enum nfs_ftype4; int nfs4_acl_bytes(int entries); int nfs4_acl_get_whotype(char *, u32); @@ -45,7 +47,7 @@ __be32 nfs4_acl_write_who(struct xdr_stream *xdr, int who); int nfsd4_get_nfs4_acl(struct svc_rqst *rqstp, struct dentry *dentry, struct nfs4_acl **acl); -__be32 nfsd4_set_nfs4_acl(struct svc_rqst *rqstp, struct svc_fh *fhp, - struct nfs4_acl *acl); +__be32 nfsd4_acl_to_attr(enum nfs_ftype4 type, struct nfs4_acl *acl, + struct nfsd_attrs *attr); #endif /* LINUX_NFS4_ACL_H */ diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c index 9bbaa671c079..b6d01d51a746 100644 --- a/fs/nfsd/blocklayout.c +++ b/fs/nfsd/blocklayout.c @@ -4,14 +4,10 @@ */ #include <linux/exportfs.h> #include <linux/iomap.h> -#include <linux/genhd.h> #include <linux/slab.h> #include <linux/pr.h> #include <linux/nfsd/debug.h> -#include <scsi/scsi_proto.h> -#include <scsi/scsi_common.h> -#include <scsi/scsi_request.h> #include "blocklayoutxdr.h" #include "pnfs.h" @@ -83,13 +79,13 @@ nfsd4_block_proc_layoutget(struct inode *inode, const struct svc_fh *fhp, bex->soff = iomap.addr; break; } - /*FALLTHRU*/ + fallthrough; case IOMAP_HOLE: if (seg->iomode == IOMODE_READ) { bex->es = PNFS_BLOCK_NONE_DATA; break; } - /*FALLTHRU*/ + fallthrough; case IOMAP_DELALLOC: default: WARN(1, "pnfsd: filesystem returned %d extent\n", iomap.type); @@ -170,7 +166,7 @@ nfsd4_block_proc_getdeviceinfo(struct super_block *sb, struct nfs4_client *clp, struct nfsd4_getdeviceinfo *gdp) { - if (sb->s_bdev != sb->s_bdev->bd_contains) + if (bdev_is_partition(sb->s_bdev)) return nfserr_inval; return nfserrno(nfsd4_block_get_device_info_simple(sb, gdp)); } @@ -211,109 +207,6 @@ const struct nfsd4_layout_ops bl_layout_ops = { #endif /* CONFIG_NFSD_BLOCKLAYOUT */ #ifdef CONFIG_NFSD_SCSILAYOUT -static int nfsd4_scsi_identify_device(struct block_device *bdev, - struct pnfs_block_volume *b) -{ - struct request_queue *q = bdev->bd_disk->queue; - struct request *rq; - struct scsi_request *req; - /* - * The allocation length (passed in bytes 3 and 4 of the INQUIRY - * command descriptor block) specifies the number of bytes that have - * been allocated for the data-in buffer. - * 252 is the highest one-byte value that is a multiple of 4. - * 65532 is the highest two-byte value that is a multiple of 4. - */ - size_t bufflen = 252, maxlen = 65532, len, id_len; - u8 *buf, *d, type, assoc; - int retries = 1, error; - - if (WARN_ON_ONCE(!blk_queue_scsi_passthrough(q))) - return -EINVAL; - -again: - buf = kzalloc(bufflen, GFP_KERNEL); - if (!buf) - return -ENOMEM; - - rq = blk_get_request(q, REQ_OP_SCSI_IN, 0); - if (IS_ERR(rq)) { - error = -ENOMEM; - goto out_free_buf; - } - req = scsi_req(rq); - - error = blk_rq_map_kern(q, rq, buf, bufflen, GFP_KERNEL); - if (error) - goto out_put_request; - - req->cmd[0] = INQUIRY; - req->cmd[1] = 1; - req->cmd[2] = 0x83; - req->cmd[3] = bufflen >> 8; - req->cmd[4] = bufflen & 0xff; - req->cmd_len = COMMAND_SIZE(INQUIRY); - - blk_execute_rq(rq->q, NULL, rq, 1); - if (req->result) { - pr_err("pNFS: INQUIRY 0x83 failed with: %x\n", - req->result); - error = -EIO; - goto out_put_request; - } - - len = (buf[2] << 8) + buf[3] + 4; - if (len > bufflen) { - if (len <= maxlen && retries--) { - blk_put_request(rq); - kfree(buf); - bufflen = len; - goto again; - } - pr_err("pNFS: INQUIRY 0x83 response invalid (len = %zd)\n", - len); - goto out_put_request; - } - - d = buf + 4; - for (d = buf + 4; d < buf + len; d += id_len + 4) { - id_len = d[3]; - type = d[1] & 0xf; - assoc = (d[1] >> 4) & 0x3; - - /* - * We only care about a EUI-64 and NAA designator types - * with LU association. - */ - if (assoc != 0x00) - continue; - if (type != 0x02 && type != 0x03) - continue; - if (id_len != 8 && id_len != 12 && id_len != 16) - continue; - - b->scsi.code_set = PS_CODE_SET_BINARY; - b->scsi.designator_type = type == 0x02 ? - PS_DESIGNATOR_EUI64 : PS_DESIGNATOR_NAA; - b->scsi.designator_len = id_len; - memcpy(b->scsi.designator, d + 4, id_len); - - /* - * If we found a 8 or 12 byte descriptor continue on to - * see if a 16 byte one is available. If we find a - * 16 byte descriptor we're done. - */ - if (id_len == 16) - break; - } - -out_put_request: - blk_put_request(rq); -out_free_buf: - kfree(buf); - return error; -} - #define NFSD_MDS_PR_KEY 0x0100000000000000ULL /* @@ -325,6 +218,31 @@ static u64 nfsd4_scsi_pr_key(struct nfs4_client *clp) return ((u64)clp->cl_clientid.cl_boot << 32) | clp->cl_clientid.cl_id; } +static const u8 designator_types[] = { + PS_DESIGNATOR_EUI64, + PS_DESIGNATOR_NAA, +}; + +static int +nfsd4_block_get_unique_id(struct gendisk *disk, struct pnfs_block_volume *b) +{ + int ret, i; + + for (i = 0; i < ARRAY_SIZE(designator_types); i++) { + u8 type = designator_types[i]; + + ret = disk->fops->get_unique_id(disk, b->scsi.designator, type); + if (ret > 0) { + b->scsi.code_set = PS_CODE_SET_BINARY; + b->scsi.designator_type = type; + b->scsi.designator_len = ret; + return 0; + } + } + + return -EINVAL; +} + static int nfsd4_block_get_device_info_scsi(struct super_block *sb, struct nfs4_client *clp, @@ -333,7 +251,7 @@ nfsd4_block_get_device_info_scsi(struct super_block *sb, struct pnfs_block_deviceaddr *dev; struct pnfs_block_volume *b; const struct pr_ops *ops; - int error; + int ret; dev = kzalloc(sizeof(struct pnfs_block_deviceaddr) + sizeof(struct pnfs_block_volume), GFP_KERNEL); @@ -347,33 +265,38 @@ nfsd4_block_get_device_info_scsi(struct super_block *sb, b->type = PNFS_BLOCK_VOLUME_SCSI; b->scsi.pr_key = nfsd4_scsi_pr_key(clp); - error = nfsd4_scsi_identify_device(sb->s_bdev, b); - if (error) - return error; + ret = nfsd4_block_get_unique_id(sb->s_bdev->bd_disk, b); + if (ret < 0) + goto out_free_dev; + ret = -EINVAL; ops = sb->s_bdev->bd_disk->fops->pr_ops; if (!ops) { pr_err("pNFS: device %s does not support PRs.\n", sb->s_id); - return -EINVAL; + goto out_free_dev; } - error = ops->pr_register(sb->s_bdev, 0, NFSD_MDS_PR_KEY, true); - if (error) { + ret = ops->pr_register(sb->s_bdev, 0, NFSD_MDS_PR_KEY, true); + if (ret) { pr_err("pNFS: failed to register key for device %s.\n", sb->s_id); - return -EINVAL; + goto out_free_dev; } - error = ops->pr_reserve(sb->s_bdev, NFSD_MDS_PR_KEY, + ret = ops->pr_reserve(sb->s_bdev, NFSD_MDS_PR_KEY, PR_EXCLUSIVE_ACCESS_REG_ONLY, 0); - if (error) { + if (ret) { pr_err("pNFS: failed to reserve device %s.\n", sb->s_id); - return -EINVAL; + goto out_free_dev; } return 0; + +out_free_dev: + kfree(dev); + return ret; } static __be32 @@ -382,7 +305,7 @@ nfsd4_scsi_proc_getdeviceinfo(struct super_block *sb, struct nfs4_client *clp, struct nfsd4_getdeviceinfo *gdp) { - if (sb->s_bdev != sb->s_bdev->bd_contains) + if (bdev_is_partition(sb->s_bdev)) return nfserr_inval; return nfserrno(nfsd4_block_get_device_info_scsi(sb, clp, gdp)); } diff --git a/fs/nfsd/cache.h b/fs/nfsd/cache.h index 10ec5ecdf117..f21259ead64b 100644 --- a/fs/nfsd/cache.h +++ b/fs/nfsd/cache.h @@ -78,10 +78,12 @@ enum { /* Checksum this amount of the request */ #define RC_CSUMLEN (256U) +int nfsd_drc_slab_create(void); +void nfsd_drc_slab_free(void); int nfsd_reply_cache_init(struct nfsd_net *); void nfsd_reply_cache_shutdown(struct nfsd_net *); int nfsd_cache_lookup(struct svc_rqst *); void nfsd_cache_update(struct svc_rqst *, int, __be32 *); -int nfsd_reply_cache_stats_open(struct inode *, struct file *); +int nfsd_reply_cache_stats_show(struct seq_file *m, void *v); #endif /* NFSCACHE_H */ diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index 15422c951fd1..668c7527b17e 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -23,6 +23,7 @@ #include "netns.h" #include "pnfs.h" #include "filecache.h" +#include "trace.h" #define NFSDDBG_FACILITY NFSDDBG_EXPORT @@ -50,6 +51,11 @@ static void expkey_put(struct kref *ref) kfree_rcu(key, ek_rcu); } +static int expkey_upcall(struct cache_detail *cd, struct cache_head *h) +{ + return sunrpc_cache_pipe_upcall(cd, h); +} + static void expkey_request(struct cache_detail *cd, struct cache_head *h, char **bpp, int *blen) @@ -140,7 +146,9 @@ static int expkey_parse(struct cache_detail *cd, char *mesg, int mlen) if (len == 0) { set_bit(CACHE_NEGATIVE, &key.h.flags); ek = svc_expkey_update(cd, &key, ek); - if (!ek) + if (ek) + trace_nfsd_expkey_update(ek, NULL); + else err = -ENOMEM; } else { err = kern_path(buf, 0, &key.ek_path); @@ -150,7 +158,9 @@ static int expkey_parse(struct cache_detail *cd, char *mesg, int mlen) dprintk("Found the path %s\n", buf); ek = svc_expkey_update(cd, &key, ek); - if (!ek) + if (ek) + trace_nfsd_expkey_update(ek, buf); + else err = -ENOMEM; path_put(&key.ek_path); } @@ -249,6 +259,7 @@ static const struct cache_detail svc_expkey_cache_template = { .hash_size = EXPKEY_HASHMAX, .name = "nfsd.fh", .cache_put = expkey_put, + .cache_upcall = expkey_upcall, .cache_request = expkey_request, .cache_parse = expkey_parse, .cache_show = expkey_show, @@ -320,16 +331,38 @@ static void nfsd4_fslocs_free(struct nfsd4_fs_locations *fsloc) fsloc->locations = NULL; } +static int export_stats_init(struct export_stats *stats) +{ + stats->start_time = ktime_get_seconds(); + return nfsd_percpu_counters_init(stats->counter, EXP_STATS_COUNTERS_NUM); +} + +static void export_stats_reset(struct export_stats *stats) +{ + nfsd_percpu_counters_reset(stats->counter, EXP_STATS_COUNTERS_NUM); +} + +static void export_stats_destroy(struct export_stats *stats) +{ + nfsd_percpu_counters_destroy(stats->counter, EXP_STATS_COUNTERS_NUM); +} + static void svc_export_put(struct kref *ref) { struct svc_export *exp = container_of(ref, struct svc_export, h.ref); path_put(&exp->ex_path); auth_domain_put(exp->ex_client); nfsd4_fslocs_free(&exp->ex_fslocs); + export_stats_destroy(&exp->ex_stats); kfree(exp->ex_uuid); kfree_rcu(exp, ex_rcu); } +static int svc_export_upcall(struct cache_detail *cd, struct cache_head *h) +{ + return sunrpc_cache_pipe_upcall(cd, h); +} + static void svc_export_request(struct cache_detail *cd, struct cache_head *h, char **bpp, int *blen) @@ -353,8 +386,9 @@ static struct svc_export *svc_export_update(struct svc_export *new, struct svc_export *old); static struct svc_export *svc_export_lookup(struct svc_export *); -static int check_export(struct inode *inode, int *flags, unsigned char *uuid) +static int check_export(struct path *path, int *flags, unsigned char *uuid) { + struct inode *inode = d_inode(path->dentry); /* * We currently export only dirs, regular files, and (for v4 @@ -378,6 +412,7 @@ static int check_export(struct inode *inode, int *flags, unsigned char *uuid) * or an FSID number (so NFSEXP_FSID or ->uuid is needed). * 2: We must be able to find an inode from a filehandle. * This means that s_export_op must be set. + * 3: We must not currently be on an idmapped mount. */ if (!(inode->i_sb->s_type->fs_flags & FS_REQUIRES_DEV) && !(*flags & NFSEXP_FSID) && @@ -392,6 +427,17 @@ static int check_export(struct inode *inode, int *flags, unsigned char *uuid) return -EINVAL; } + if (is_idmapped_mnt(path->mnt)) { + dprintk("exp_export: export of idmapped mounts not yet supported.\n"); + return -EINVAL; + } + + if (inode->i_sb->s_export_op->flags & EXPORT_OP_NOSUBTREECHK && + !(*flags & NFSEXP_NOSUBTREECHECK)) { + dprintk("%s: %s does not support subtree checking!\n", + __func__, inode->i_sb->s_type->name); + return -EINVAL; + } return 0; } @@ -614,8 +660,7 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen) goto out4; } - err = check_export(d_inode(exp.ex_path.dentry), &exp.ex_flags, - exp.ex_uuid); + err = check_export(&exp.ex_path, &exp.ex_flags, exp.ex_uuid); if (err) goto out4; /* @@ -643,15 +688,17 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen) } expp = svc_export_lookup(&exp); - if (expp) - expp = svc_export_update(&exp, expp); - else + if (!expp) { err = -ENOMEM; - cache_flush(); - if (expp == NULL) - err = -ENOMEM; - else + goto out4; + } + expp = svc_export_update(&exp, expp); + if (expp) { + trace_nfsd_export_update(expp); + cache_flush(); exp_put(expp); + } else + err = -ENOMEM; out4: nfsd4_fslocs_free(&exp.ex_fslocs); kfree(exp.ex_uuid); @@ -668,22 +715,47 @@ static void exp_flags(struct seq_file *m, int flag, int fsid, kuid_t anonu, kgid_t anong, struct nfsd4_fs_locations *fslocs); static void show_secinfo(struct seq_file *m, struct svc_export *exp); +static int is_export_stats_file(struct seq_file *m) +{ + /* + * The export_stats file uses the same ops as the exports file. + * We use the file's name to determine the reported info per export. + * There is no rename in nsfdfs, so d_name.name is stable. + */ + return !strcmp(m->file->f_path.dentry->d_name.name, "export_stats"); +} + static int svc_export_show(struct seq_file *m, struct cache_detail *cd, struct cache_head *h) { - struct svc_export *exp ; + struct svc_export *exp; + bool export_stats = is_export_stats_file(m); - if (h ==NULL) { - seq_puts(m, "#path domain(flags)\n"); + if (h == NULL) { + if (export_stats) + seq_puts(m, "#path domain start-time\n#\tstats\n"); + else + seq_puts(m, "#path domain(flags)\n"); return 0; } exp = container_of(h, struct svc_export, h); seq_path(m, &exp->ex_path, " \t\n\\"); seq_putc(m, '\t'); seq_escape(m, exp->ex_client->name, " \t\n\\"); + if (export_stats) { + seq_printf(m, "\t%lld\n", exp->ex_stats.start_time); + seq_printf(m, "\tfh_stale: %lld\n", + percpu_counter_sum_positive(&exp->ex_stats.counter[EXP_STATS_FH_STALE])); + seq_printf(m, "\tio_read: %lld\n", + percpu_counter_sum_positive(&exp->ex_stats.counter[EXP_STATS_IO_READ])); + seq_printf(m, "\tio_write: %lld\n", + percpu_counter_sum_positive(&exp->ex_stats.counter[EXP_STATS_IO_WRITE])); + seq_putc(m, '\n'); + return 0; + } seq_putc(m, '('); - if (test_bit(CACHE_VALID, &h->flags) && + if (test_bit(CACHE_VALID, &h->flags) && !test_bit(CACHE_NEGATIVE, &h->flags)) { exp_flags(m, exp->ex_flags, exp->ex_fsid, exp->ex_anon_uid, exp->ex_anon_gid, &exp->ex_fslocs); @@ -724,6 +796,7 @@ static void svc_export_init(struct cache_head *cnew, struct cache_head *citem) new->ex_layout_types = 0; new->ex_uuid = NULL; new->cd = item->cd; + export_stats_reset(&new->ex_stats); } static void export_update(struct cache_head *cnew, struct cache_head *citem) @@ -756,10 +829,15 @@ static void export_update(struct cache_head *cnew, struct cache_head *citem) static struct cache_head *svc_export_alloc(void) { struct svc_export *i = kmalloc(sizeof(*i), GFP_KERNEL); - if (i) - return &i->h; - else + if (!i) return NULL; + + if (export_stats_init(&i->ex_stats)) { + kfree(i); + return NULL; + } + + return &i->h; } static const struct cache_detail svc_export_cache_template = { @@ -767,6 +845,7 @@ static const struct cache_detail svc_export_cache_template = { .hash_size = EXPORT_HASHMAX, .name = "nfsd.export", .cache_put = svc_export_put, + .cache_upcall = svc_export_upcall, .cache_request = svc_export_request, .cache_parse = svc_export_parse, .cache_show = svc_export_show, @@ -832,8 +911,10 @@ exp_find_key(struct cache_detail *cd, struct auth_domain *clp, int fsid_type, if (ek == NULL) return ERR_PTR(-ENOMEM); err = cache_check(cd, &ek->h, reqp); - if (err) + if (err) { + trace_nfsd_exp_find_key(&key, err); return ERR_PTR(err); + } return ek; } @@ -855,8 +936,10 @@ exp_get_by_name(struct cache_detail *cd, struct auth_domain *clp, if (exp == NULL) return ERR_PTR(-ENOMEM); err = cache_check(cd, &exp->h, reqp); - if (err) + if (err) { + trace_nfsd_exp_get_by_name(&key, err); return ERR_PTR(err); + } return exp; } @@ -979,7 +1062,7 @@ __be32 check_nfsd_access(struct svc_export *exp, struct svc_rqst *rqstp) if (nfsd4_spo_must_allow(rqstp)) return 0; - return nfserr_wrongsec; + return rqstp->rq_vers < 4 ? nfserr_acces : nfserr_wrongsec; } /* @@ -1216,10 +1299,14 @@ static int e_show(struct seq_file *m, void *p) struct cache_head *cp = p; struct svc_export *exp = container_of(cp, struct svc_export, h); struct cache_detail *cd = m->private; + bool export_stats = is_export_stats_file(m); if (p == SEQ_START_TOKEN) { seq_puts(m, "# Version 1.1\n"); - seq_puts(m, "# Path Client(Flags) # IPs\n"); + if (export_stats) + seq_puts(m, "# Path Client Start-time\n#\tStats\n"); + else + seq_puts(m, "# Path Client(Flags) # IPs\n"); return 0; } diff --git a/fs/nfsd/export.h b/fs/nfsd/export.h index e7daa1f246f0..ee0e3aba4a6e 100644 --- a/fs/nfsd/export.h +++ b/fs/nfsd/export.h @@ -6,6 +6,7 @@ #define NFSD_EXPORT_H #include <linux/sunrpc/cache.h> +#include <linux/percpu_counter.h> #include <uapi/linux/nfsd/export.h> #include <linux/nfs4.h> @@ -46,6 +47,19 @@ struct exp_flavor_info { u32 flags; }; +/* Per-export stats */ +enum { + EXP_STATS_FH_STALE, + EXP_STATS_IO_READ, + EXP_STATS_IO_WRITE, + EXP_STATS_COUNTERS_NUM +}; + +struct export_stats { + time64_t start_time; + struct percpu_counter counter[EXP_STATS_COUNTERS_NUM]; +}; + struct svc_export { struct cache_head h; struct auth_domain * ex_client; @@ -62,6 +76,7 @@ struct svc_export { struct nfsd4_deviceid_map *ex_devid_map; struct cache_detail *cd; struct rcu_head ex_rcu; + struct export_stats ex_stats; }; /* an "export key" (expkey) maps a filehandlefragement to an diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index 22e77ede9f14..ec3fceb92236 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -7,11 +7,13 @@ #include <linux/hash.h> #include <linux/slab.h> #include <linux/file.h> +#include <linux/pagemap.h> #include <linux/sched.h> #include <linux/list_lru.h> #include <linux/fsnotify_backend.h> #include <linux/fsnotify.h> #include <linux/seq_file.h> +#include <linux/rhashtable.h> #include "vfs.h" #include "nfsd.h" @@ -20,59 +22,170 @@ #include "filecache.h" #include "trace.h" -#define NFSDDBG_FACILITY NFSDDBG_FH - -/* FIXME: dynamically size this for the machine somehow? */ -#define NFSD_FILE_HASH_BITS 12 -#define NFSD_FILE_HASH_SIZE (1 << NFSD_FILE_HASH_BITS) #define NFSD_LAUNDRETTE_DELAY (2 * HZ) -#define NFSD_FILE_SHUTDOWN (1) -#define NFSD_FILE_LRU_THRESHOLD (4096UL) -#define NFSD_FILE_LRU_LIMIT (NFSD_FILE_LRU_THRESHOLD << 2) +#define NFSD_FILE_CACHE_UP (0) /* We only care about NFSD_MAY_READ/WRITE for this cache */ #define NFSD_FILE_MAY_MASK (NFSD_MAY_READ|NFSD_MAY_WRITE) -struct nfsd_fcache_bucket { - struct hlist_head nfb_head; - spinlock_t nfb_lock; - unsigned int nfb_count; - unsigned int nfb_maxcount; -}; - static DEFINE_PER_CPU(unsigned long, nfsd_file_cache_hits); +static DEFINE_PER_CPU(unsigned long, nfsd_file_acquisitions); +static DEFINE_PER_CPU(unsigned long, nfsd_file_releases); +static DEFINE_PER_CPU(unsigned long, nfsd_file_total_age); +static DEFINE_PER_CPU(unsigned long, nfsd_file_pages_flushed); +static DEFINE_PER_CPU(unsigned long, nfsd_file_evictions); struct nfsd_fcache_disposal { - struct list_head list; struct work_struct work; - struct net *net; spinlock_t lock; struct list_head freeme; - struct rcu_head rcu; }; static struct workqueue_struct *nfsd_filecache_wq __read_mostly; static struct kmem_cache *nfsd_file_slab; static struct kmem_cache *nfsd_file_mark_slab; -static struct nfsd_fcache_bucket *nfsd_file_hashtbl; static struct list_lru nfsd_file_lru; -static long nfsd_file_lru_flags; +static unsigned long nfsd_file_flags; static struct fsnotify_group *nfsd_file_fsnotify_group; -static atomic_long_t nfsd_filecache_count; static struct delayed_work nfsd_filecache_laundrette; -static DEFINE_SPINLOCK(laundrette_lock); -static LIST_HEAD(laundrettes); +static struct rhashtable nfsd_file_rhash_tbl + ____cacheline_aligned_in_smp; + +enum nfsd_file_lookup_type { + NFSD_FILE_KEY_INODE, + NFSD_FILE_KEY_FULL, +}; + +struct nfsd_file_lookup_key { + struct inode *inode; + struct net *net; + const struct cred *cred; + unsigned char need; + enum nfsd_file_lookup_type type; +}; + +/* + * The returned hash value is based solely on the address of an in-code + * inode, a pointer to a slab-allocated object. The entropy in such a + * pointer is concentrated in its middle bits. + */ +static u32 nfsd_file_inode_hash(const struct inode *inode, u32 seed) +{ + unsigned long ptr = (unsigned long)inode; + u32 k; + + k = ptr >> L1_CACHE_SHIFT; + k &= 0x00ffffff; + return jhash2(&k, 1, seed); +} + +/** + * nfsd_file_key_hashfn - Compute the hash value of a lookup key + * @data: key on which to compute the hash value + * @len: rhash table's key_len parameter (unused) + * @seed: rhash table's random seed of the day + * + * Return value: + * Computed 32-bit hash value + */ +static u32 nfsd_file_key_hashfn(const void *data, u32 len, u32 seed) +{ + const struct nfsd_file_lookup_key *key = data; -static void nfsd_file_gc(void); + return nfsd_file_inode_hash(key->inode, seed); +} + +/** + * nfsd_file_obj_hashfn - Compute the hash value of an nfsd_file + * @data: object on which to compute the hash value + * @len: rhash table's key_len parameter (unused) + * @seed: rhash table's random seed of the day + * + * Return value: + * Computed 32-bit hash value + */ +static u32 nfsd_file_obj_hashfn(const void *data, u32 len, u32 seed) +{ + const struct nfsd_file *nf = data; + + return nfsd_file_inode_hash(nf->nf_inode, seed); +} + +static bool +nfsd_match_cred(const struct cred *c1, const struct cred *c2) +{ + int i; + + if (!uid_eq(c1->fsuid, c2->fsuid)) + return false; + if (!gid_eq(c1->fsgid, c2->fsgid)) + return false; + if (c1->group_info == NULL || c2->group_info == NULL) + return c1->group_info == c2->group_info; + if (c1->group_info->ngroups != c2->group_info->ngroups) + return false; + for (i = 0; i < c1->group_info->ngroups; i++) { + if (!gid_eq(c1->group_info->gid[i], c2->group_info->gid[i])) + return false; + } + return true; +} + +/** + * nfsd_file_obj_cmpfn - Match a cache item against search criteria + * @arg: search criteria + * @ptr: cache item to check + * + * Return values: + * %0 - Item matches search criteria + * %1 - Item does not match search criteria + */ +static int nfsd_file_obj_cmpfn(struct rhashtable_compare_arg *arg, + const void *ptr) +{ + const struct nfsd_file_lookup_key *key = arg->key; + const struct nfsd_file *nf = ptr; + + switch (key->type) { + case NFSD_FILE_KEY_INODE: + if (nf->nf_inode != key->inode) + return 1; + break; + case NFSD_FILE_KEY_FULL: + if (nf->nf_inode != key->inode) + return 1; + if (nf->nf_may != key->need) + return 1; + if (nf->nf_net != key->net) + return 1; + if (!nfsd_match_cred(nf->nf_cred, key->cred)) + return 1; + if (test_bit(NFSD_FILE_HASHED, &nf->nf_flags) == 0) + return 1; + break; + } + return 0; +} + +static const struct rhashtable_params nfsd_file_rhash_params = { + .key_len = sizeof_field(struct nfsd_file, nf_inode), + .key_offset = offsetof(struct nfsd_file, nf_inode), + .head_offset = offsetof(struct nfsd_file, nf_rhash), + .hashfn = nfsd_file_key_hashfn, + .obj_hashfn = nfsd_file_obj_hashfn, + .obj_cmpfn = nfsd_file_obj_cmpfn, + /* Reduce resizing churn on light workloads */ + .min_size = 512, /* buckets */ + .automatic_shrinking = true, +}; static void nfsd_file_schedule_laundrette(void) { - long count = atomic_long_read(&nfsd_filecache_count); - - if (count == 0 || test_bit(NFSD_FILE_SHUTDOWN, &nfsd_file_lru_flags)) + if ((atomic_read(&nfsd_file_rhash_tbl.nelems) == 0) || + test_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags) == 0) return; queue_delayed_work(system_wq, &nfsd_filecache_laundrette, @@ -115,22 +228,21 @@ nfsd_file_mark_put(struct nfsd_file_mark *nfm) } static struct nfsd_file_mark * -nfsd_file_mark_find_or_create(struct nfsd_file *nf) +nfsd_file_mark_find_or_create(struct nfsd_file *nf, struct inode *inode) { int err; struct fsnotify_mark *mark; struct nfsd_file_mark *nfm = NULL, *new; - struct inode *inode = nf->nf_inode; do { - mutex_lock(&nfsd_file_fsnotify_group->mark_mutex); + fsnotify_group_lock(nfsd_file_fsnotify_group); mark = fsnotify_find_mark(&inode->i_fsnotify_marks, - nfsd_file_fsnotify_group); + nfsd_file_fsnotify_group); if (mark) { nfm = nfsd_file_mark_get(container_of(mark, struct nfsd_file_mark, nfm_mark)); - mutex_unlock(&nfsd_file_fsnotify_group->mark_mutex); + fsnotify_group_unlock(nfsd_file_fsnotify_group); if (nfm) { fsnotify_put_mark(mark); break; @@ -138,8 +250,9 @@ nfsd_file_mark_find_or_create(struct nfsd_file *nf) /* Avoid soft lockup race with nfsd_file_mark_put() */ fsnotify_destroy_mark(mark, nfsd_file_fsnotify_group); fsnotify_put_mark(mark); - } else - mutex_unlock(&nfsd_file_fsnotify_group->mark_mutex); + } else { + fsnotify_group_unlock(nfsd_file_fsnotify_group); + } /* allocate a new nfm */ new = kmem_cache_alloc(nfsd_file_mark_slab, GFP_KERNEL); @@ -170,32 +283,25 @@ nfsd_file_mark_find_or_create(struct nfsd_file *nf) } static struct nfsd_file * -nfsd_file_alloc(struct inode *inode, unsigned int may, unsigned int hashval, - struct net *net) +nfsd_file_alloc(struct nfsd_file_lookup_key *key, unsigned int may) { struct nfsd_file *nf; nf = kmem_cache_alloc(nfsd_file_slab, GFP_KERNEL); if (nf) { - INIT_HLIST_NODE(&nf->nf_node); INIT_LIST_HEAD(&nf->nf_lru); + nf->nf_birthtime = ktime_get(); nf->nf_file = NULL; nf->nf_cred = get_current_cred(); - nf->nf_net = net; + nf->nf_net = key->net; nf->nf_flags = 0; - nf->nf_inode = inode; - nf->nf_hashval = hashval; - refcount_set(&nf->nf_ref, 1); - nf->nf_may = may & NFSD_FILE_MAY_MASK; - if (may & NFSD_MAY_NOT_BREAK_LEASE) { - if (may & NFSD_MAY_WRITE) - __set_bit(NFSD_FILE_BREAK_WRITE, &nf->nf_flags); - if (may & NFSD_MAY_READ) - __set_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags); - } + __set_bit(NFSD_FILE_HASHED, &nf->nf_flags); + __set_bit(NFSD_FILE_PENDING, &nf->nf_flags); + nf->nf_inode = key->inode; + /* nf_ref is pre-incremented for hash table */ + refcount_set(&nf->nf_ref, 2); + nf->nf_may = key->need; nf->nf_mark = NULL; - init_rwsem(&nf->nf_rwsem); - trace_nfsd_file_alloc(nf); } return nf; } @@ -203,8 +309,12 @@ nfsd_file_alloc(struct inode *inode, unsigned int may, unsigned int hashval, static bool nfsd_file_free(struct nfsd_file *nf) { + s64 age = ktime_to_ms(ktime_sub(ktime_get(), nf->nf_birthtime)); bool flush = false; + this_cpu_inc(nfsd_file_releases); + this_cpu_add(nfsd_file_total_age, age); + trace_nfsd_file_put_final(nf); if (nf->nf_mark) nfsd_file_mark_put(nf->nf_mark); @@ -214,6 +324,14 @@ nfsd_file_free(struct nfsd_file *nf) fput(nf->nf_file); flush = true; } + + /* + * If this item is still linked via nf_lru, that's a bug. + * WARN and leak it to preserve system stability. + */ + if (WARN_ON_ONCE(!list_empty(&nf->nf_lru))) + return flush; + call_rcu(&nf->nf_rcu, nfsd_file_slab_free); return flush; } @@ -242,48 +360,60 @@ nfsd_file_check_write_error(struct nfsd_file *nf) } static void -nfsd_file_do_unhash(struct nfsd_file *nf) +nfsd_file_flush(struct nfsd_file *nf) { - lockdep_assert_held(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock); + struct file *file = nf->nf_file; + + if (!file || !(file->f_mode & FMODE_WRITE)) + return; + this_cpu_add(nfsd_file_pages_flushed, file->f_mapping->nrpages); + if (vfs_fsync(file, 1) != 0) + nfsd_reset_write_verifier(net_generic(nf->nf_net, nfsd_net_id)); +} +static void nfsd_file_lru_add(struct nfsd_file *nf) +{ + set_bit(NFSD_FILE_REFERENCED, &nf->nf_flags); + if (list_lru_add(&nfsd_file_lru, &nf->nf_lru)) + trace_nfsd_file_lru_add(nf); +} + +static void nfsd_file_lru_remove(struct nfsd_file *nf) +{ + if (list_lru_del(&nfsd_file_lru, &nf->nf_lru)) + trace_nfsd_file_lru_del(nf); +} + +static void +nfsd_file_hash_remove(struct nfsd_file *nf) +{ trace_nfsd_file_unhash(nf); if (nfsd_file_check_write_error(nf)) - nfsd_reset_boot_verifier(net_generic(nf->nf_net, nfsd_net_id)); - --nfsd_file_hashtbl[nf->nf_hashval].nfb_count; - hlist_del_rcu(&nf->nf_node); - atomic_long_dec(&nfsd_filecache_count); + nfsd_reset_write_verifier(net_generic(nf->nf_net, nfsd_net_id)); + rhashtable_remove_fast(&nfsd_file_rhash_tbl, &nf->nf_rhash, + nfsd_file_rhash_params); } static bool nfsd_file_unhash(struct nfsd_file *nf) { if (test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { - nfsd_file_do_unhash(nf); - if (!list_empty(&nf->nf_lru)) - list_lru_del(&nfsd_file_lru, &nf->nf_lru); + nfsd_file_hash_remove(nf); return true; } return false; } -/* - * Return true if the file was unhashed. - */ -static bool -nfsd_file_unhash_and_release_locked(struct nfsd_file *nf, struct list_head *dispose) +static void +nfsd_file_unhash_and_dispose(struct nfsd_file *nf, struct list_head *dispose) { - lockdep_assert_held(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock); - - trace_nfsd_file_unhash_and_release_locked(nf); - if (!nfsd_file_unhash(nf)) - return false; - /* keep final reference for nfsd_file_lru_dispose */ - if (refcount_dec_not_one(&nf->nf_ref)) - return true; - - list_add(&nf->nf_lru, dispose); - return true; + trace_nfsd_file_unhash_and_dispose(nf); + if (nfsd_file_unhash(nf)) { + /* caller must call nfsd_file_dispose_list() later */ + nfsd_file_lru_remove(nf); + list_add(&nf->nf_lru, dispose); + } } static void @@ -293,6 +423,7 @@ nfsd_file_put_noref(struct nfsd_file *nf) if (refcount_dec_and_test(&nf->nf_ref)) { WARN_ON(test_bit(NFSD_FILE_HASHED, &nf->nf_flags)); + nfsd_file_lru_remove(nf); nfsd_file_free(nf); } } @@ -300,21 +431,35 @@ nfsd_file_put_noref(struct nfsd_file *nf) void nfsd_file_put(struct nfsd_file *nf) { - bool is_hashed; + might_sleep(); - set_bit(NFSD_FILE_REFERENCED, &nf->nf_flags); - if (refcount_read(&nf->nf_ref) > 2 || !nf->nf_file) { + nfsd_file_lru_add(nf); + if (test_bit(NFSD_FILE_HASHED, &nf->nf_flags) == 0) { + nfsd_file_flush(nf); + nfsd_file_put_noref(nf); + } else if (nf->nf_file) { nfsd_file_put_noref(nf); - return; - } - - filemap_flush(nf->nf_file->f_mapping); - is_hashed = test_bit(NFSD_FILE_HASHED, &nf->nf_flags) != 0; - nfsd_file_put_noref(nf); - if (is_hashed) nfsd_file_schedule_laundrette(); - if (atomic_long_read(&nfsd_filecache_count) >= NFSD_FILE_LRU_LIMIT) - nfsd_file_gc(); + } else + nfsd_file_put_noref(nf); +} + +/** + * nfsd_file_close - Close an nfsd_file + * @nf: nfsd_file to close + * + * If this is the final reference for @nf, free it immediately. + * This reflects an on-the-wire CLOSE or DELEGRETURN into the + * VFS and exported filesystem. + */ +void nfsd_file_close(struct nfsd_file *nf) +{ + nfsd_file_put(nf); + if (refcount_dec_if_one(&nf->nf_ref)) { + nfsd_file_unhash(nf); + nfsd_file_lru_remove(nf); + nfsd_file_free(nf); + } } struct nfsd_file * @@ -332,7 +477,8 @@ nfsd_file_dispose_list(struct list_head *dispose) while(!list_empty(dispose)) { nf = list_first_entry(dispose, struct nfsd_file, nf_lru); - list_del(&nf->nf_lru); + list_del_init(&nf->nf_lru); + nfsd_file_flush(nf); nfsd_file_put_noref(nf); } } @@ -345,7 +491,8 @@ nfsd_file_dispose_list_sync(struct list_head *dispose) while(!list_empty(dispose)) { nf = list_first_entry(dispose, struct nfsd_file, nf_lru); - list_del(&nf->nf_lru); + list_del_init(&nf->nf_lru); + nfsd_file_flush(nf); if (!refcount_dec_and_test(&nf->nf_ref)) continue; if (nfsd_file_free(nf)) @@ -367,19 +514,13 @@ nfsd_file_list_remove_disposal(struct list_head *dst, static void nfsd_file_list_add_disposal(struct list_head *files, struct net *net) { - struct nfsd_fcache_disposal *l; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + struct nfsd_fcache_disposal *l = nn->fcache_disposal; - rcu_read_lock(); - list_for_each_entry_rcu(l, &laundrettes, list) { - if (l->net == net) { - spin_lock(&l->lock); - list_splice_tail_init(files, &l->freeme); - spin_unlock(&l->lock); - queue_work(nfsd_filecache_wq, &l->work); - break; - } - } - rcu_read_unlock(); + spin_lock(&l->lock); + list_splice_tail_init(files, &l->freeme); + spin_unlock(&l->lock); + queue_work(nfsd_filecache_wq, &l->work); } static void @@ -407,8 +548,17 @@ nfsd_file_dispose_list_delayed(struct list_head *dispose) } } -/* - * Note this can deadlock with nfsd_file_cache_purge. +/** + * nfsd_file_lru_cb - Examine an entry on the LRU list + * @item: LRU entry to examine + * @lru: controlling LRU + * @lock: LRU list lock (unused) + * @arg: dispose list + * + * Return values: + * %LRU_REMOVED: @item was removed from the LRU + * %LRU_ROTATE: @item is to be moved to the LRU tail + * %LRU_SKIP: @item cannot be evicted */ static enum lru_status nfsd_file_lru_cb(struct list_head *item, struct list_lru_one *lru, @@ -429,55 +579,65 @@ nfsd_file_lru_cb(struct list_head *item, struct list_lru_one *lru, * counter. Here we check the counter and then test and clear the flag. * That order is deliberate to ensure that we can do this locklessly. */ - if (refcount_read(&nf->nf_ref) > 1) - goto out_skip; + if (refcount_read(&nf->nf_ref) > 1) { + list_lru_isolate(lru, &nf->nf_lru); + trace_nfsd_file_gc_in_use(nf); + return LRU_REMOVED; + } /* * Don't throw out files that are still undergoing I/O or * that have uncleared errors pending. */ - if (nfsd_file_check_writeback(nf)) - goto out_skip; + if (nfsd_file_check_writeback(nf)) { + trace_nfsd_file_gc_writeback(nf); + return LRU_SKIP; + } - if (test_and_clear_bit(NFSD_FILE_REFERENCED, &nf->nf_flags)) - goto out_skip; + if (test_and_clear_bit(NFSD_FILE_REFERENCED, &nf->nf_flags)) { + trace_nfsd_file_gc_referenced(nf); + return LRU_ROTATE; + } - if (!test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags)) - goto out_skip; + if (!test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { + trace_nfsd_file_gc_hashed(nf); + return LRU_SKIP; + } list_lru_isolate_move(lru, &nf->nf_lru, head); + this_cpu_inc(nfsd_file_evictions); + trace_nfsd_file_gc_disposed(nf); return LRU_REMOVED; -out_skip: - return LRU_SKIP; } -static unsigned long -nfsd_file_lru_walk_list(struct shrink_control *sc) +/* + * Unhash items on @dispose immediately, then queue them on the + * disposal workqueue to finish releasing them in the background. + * + * cel: Note that between the time list_lru_shrink_walk runs and + * now, these items are in the hash table but marked unhashed. + * Why release these outside of lru_cb ? There's no lock ordering + * problem since lru_cb currently takes no lock. + */ +static void nfsd_file_gc_dispose_list(struct list_head *dispose) { - LIST_HEAD(head); struct nfsd_file *nf; - unsigned long ret; - if (sc) - ret = list_lru_shrink_walk(&nfsd_file_lru, sc, - nfsd_file_lru_cb, &head); - else - ret = list_lru_walk(&nfsd_file_lru, - nfsd_file_lru_cb, - &head, LONG_MAX); - list_for_each_entry(nf, &head, nf_lru) { - spin_lock(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock); - nfsd_file_do_unhash(nf); - spin_unlock(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock); - } - nfsd_file_dispose_list_delayed(&head); - return ret; + list_for_each_entry(nf, dispose, nf_lru) + nfsd_file_hash_remove(nf); + nfsd_file_dispose_list_delayed(dispose); } static void nfsd_file_gc(void) { - nfsd_file_lru_walk_list(NULL); + LIST_HEAD(dispose); + unsigned long ret; + + ret = list_lru_walk(&nfsd_file_lru, nfsd_file_lru_cb, + &dispose, list_lru_count(&nfsd_file_lru)); + trace_nfsd_file_gc_removed(ret, list_lru_count(&nfsd_file_lru)); + nfsd_file_gc_dispose_list(&dispose); } static void @@ -496,7 +656,14 @@ nfsd_file_lru_count(struct shrinker *s, struct shrink_control *sc) static unsigned long nfsd_file_lru_scan(struct shrinker *s, struct shrink_control *sc) { - return nfsd_file_lru_walk_list(sc); + LIST_HEAD(dispose); + unsigned long ret; + + ret = list_lru_shrink_walk(&nfsd_file_lru, sc, + nfsd_file_lru_cb, &dispose); + trace_nfsd_file_shrinker_removed(ret, list_lru_count(&nfsd_file_lru)); + nfsd_file_gc_dispose_list(&dispose); + return ret; } static struct shrinker nfsd_file_shrinker = { @@ -505,59 +672,64 @@ static struct shrinker nfsd_file_shrinker = { .seeks = 1, }; -static void -__nfsd_file_close_inode(struct inode *inode, unsigned int hashval, - struct list_head *dispose) +/* + * Find all cache items across all net namespaces that match @inode and + * move them to @dispose. The lookup is atomic wrt nfsd_file_acquire(). + */ +static unsigned int +__nfsd_file_close_inode(struct inode *inode, struct list_head *dispose) { - struct nfsd_file *nf; - struct hlist_node *tmp; + struct nfsd_file_lookup_key key = { + .type = NFSD_FILE_KEY_INODE, + .inode = inode, + }; + unsigned int count = 0; + struct nfsd_file *nf; - spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock); - hlist_for_each_entry_safe(nf, tmp, &nfsd_file_hashtbl[hashval].nfb_head, nf_node) { - if (inode == nf->nf_inode) - nfsd_file_unhash_and_release_locked(nf, dispose); - } - spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock); + rcu_read_lock(); + do { + nf = rhashtable_lookup(&nfsd_file_rhash_tbl, &key, + nfsd_file_rhash_params); + if (!nf) + break; + nfsd_file_unhash_and_dispose(nf, dispose); + count++; + } while (1); + rcu_read_unlock(); + return count; } /** * nfsd_file_close_inode_sync - attempt to forcibly close a nfsd_file * @inode: inode of the file to attempt to remove * - * Walk the whole hash bucket, looking for any files that correspond to "inode". - * If any do, then unhash them and put the hashtable reference to them and - * destroy any that had their last reference put. Also ensure that any of the - * fputs also have their final __fput done as well. + * Unhash and put, then flush and fput all cache items associated with @inode. */ void nfsd_file_close_inode_sync(struct inode *inode) { - unsigned int hashval = (unsigned int)hash_long(inode->i_ino, - NFSD_FILE_HASH_BITS); LIST_HEAD(dispose); + unsigned int count; - __nfsd_file_close_inode(inode, hashval, &dispose); - trace_nfsd_file_close_inode_sync(inode, hashval, !list_empty(&dispose)); + count = __nfsd_file_close_inode(inode, &dispose); + trace_nfsd_file_close_inode_sync(inode, count); nfsd_file_dispose_list_sync(&dispose); } /** - * nfsd_file_close_inode_sync - attempt to forcibly close a nfsd_file + * nfsd_file_close_inode - attempt a delayed close of a nfsd_file * @inode: inode of the file to attempt to remove * - * Walk the whole hash bucket, looking for any files that correspond to "inode". - * If any do, then unhash them and put the hashtable reference to them and - * destroy any that had their last reference put. + * Unhash and put all cache item associated with @inode. */ static void nfsd_file_close_inode(struct inode *inode) { - unsigned int hashval = (unsigned int)hash_long(inode->i_ino, - NFSD_FILE_HASH_BITS); LIST_HEAD(dispose); + unsigned int count; - __nfsd_file_close_inode(inode, hashval, &dispose); - trace_nfsd_file_close_inode(inode, hashval, !list_empty(&dispose)); + count = __nfsd_file_close_inode(inode, &dispose); + trace_nfsd_file_close_inode(inode, count); nfsd_file_dispose_list_delayed(&dispose); } @@ -567,8 +739,6 @@ nfsd_file_close_inode(struct inode *inode) * * Walk the LRU list and close any entries that have not been used since * the last scan. - * - * Note this can deadlock with nfsd_file_cache_purge. */ static void nfsd_file_delayed_close(struct work_struct *work) @@ -598,12 +768,13 @@ static struct notifier_block nfsd_file_lease_notifier = { }; static int -nfsd_file_fsnotify_handle_event(struct fsnotify_group *group, - struct inode *inode, - u32 mask, const void *data, int data_type, - const struct qstr *file_name, u32 cookie, - struct fsnotify_iter_info *iter_info) +nfsd_file_fsnotify_handle_event(struct fsnotify_mark *mark, u32 mask, + struct inode *inode, struct inode *dir, + const struct qstr *name, u32 cookie) { + if (WARN_ON_ONCE(!inode)) + return 0; + trace_nfsd_file_fsnotify_handle_event(inode, mask); /* Should be no marks on non-regular files */ @@ -624,32 +795,28 @@ nfsd_file_fsnotify_handle_event(struct fsnotify_group *group, static const struct fsnotify_ops nfsd_file_fsnotify_ops = { - .handle_event = nfsd_file_fsnotify_handle_event, + .handle_inode_event = nfsd_file_fsnotify_handle_event, .free_mark = nfsd_file_mark_free, }; int nfsd_file_cache_init(void) { - int ret = -ENOMEM; - unsigned int i; + int ret; - clear_bit(NFSD_FILE_SHUTDOWN, &nfsd_file_lru_flags); - - if (nfsd_file_hashtbl) + lockdep_assert_held(&nfsd_mutex); + if (test_and_set_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags) == 1) return 0; + ret = rhashtable_init(&nfsd_file_rhash_tbl, &nfsd_file_rhash_params); + if (ret) + return ret; + + ret = -ENOMEM; nfsd_filecache_wq = alloc_workqueue("nfsd_filecache", 0, 0); if (!nfsd_filecache_wq) goto out; - nfsd_file_hashtbl = kcalloc(NFSD_FILE_HASH_SIZE, - sizeof(*nfsd_file_hashtbl), GFP_KERNEL); - if (!nfsd_file_hashtbl) { - pr_err("nfsd: unable to allocate nfsd_file_hashtbl\n"); - goto out_err; - } - nfsd_file_slab = kmem_cache_create("nfsd_file", sizeof(struct nfsd_file), 0, 0, NULL); if (!nfsd_file_slab) { @@ -671,7 +838,7 @@ nfsd_file_cache_init(void) goto out_err; } - ret = register_shrinker(&nfsd_file_shrinker); + ret = register_shrinker(&nfsd_file_shrinker, "nfsd-filecache"); if (ret) { pr_err("nfsd: failed to register nfsd_file_shrinker: %d\n", ret); goto out_lru; @@ -683,19 +850,16 @@ nfsd_file_cache_init(void) goto out_shrinker; } - nfsd_file_fsnotify_group = fsnotify_alloc_group(&nfsd_file_fsnotify_ops); + nfsd_file_fsnotify_group = fsnotify_alloc_group(&nfsd_file_fsnotify_ops, + FSNOTIFY_GROUP_NOFS); if (IS_ERR(nfsd_file_fsnotify_group)) { pr_err("nfsd: unable to create fsnotify group: %ld\n", PTR_ERR(nfsd_file_fsnotify_group)); + ret = PTR_ERR(nfsd_file_fsnotify_group); nfsd_file_fsnotify_group = NULL; goto out_notifier; } - for (i = 0; i < NFSD_FILE_HASH_SIZE; i++) { - INIT_HLIST_HEAD(&nfsd_file_hashtbl[i].nfb_head); - spin_lock_init(&nfsd_file_hashtbl[i].nfb_lock); - } - INIT_DELAYED_WORK(&nfsd_filecache_laundrette, nfsd_file_gc_worker); out: return ret; @@ -710,50 +874,39 @@ out_err: nfsd_file_slab = NULL; kmem_cache_destroy(nfsd_file_mark_slab); nfsd_file_mark_slab = NULL; - kfree(nfsd_file_hashtbl); - nfsd_file_hashtbl = NULL; destroy_workqueue(nfsd_filecache_wq); nfsd_filecache_wq = NULL; + rhashtable_destroy(&nfsd_file_rhash_tbl); goto out; } -/* - * Note this can deadlock with nfsd_file_lru_cb. - */ -void -nfsd_file_cache_purge(struct net *net) +static void +__nfsd_file_cache_purge(struct net *net) { - unsigned int i; - struct nfsd_file *nf; - struct hlist_node *next; + struct rhashtable_iter iter; + struct nfsd_file *nf; LIST_HEAD(dispose); - bool del; - if (!nfsd_file_hashtbl) - return; + rhashtable_walk_enter(&nfsd_file_rhash_tbl, &iter); + do { + rhashtable_walk_start(&iter); - for (i = 0; i < NFSD_FILE_HASH_SIZE; i++) { - struct nfsd_fcache_bucket *nfb = &nfsd_file_hashtbl[i]; + nf = rhashtable_walk_next(&iter); + while (!IS_ERR_OR_NULL(nf)) { + if (!net || nf->nf_net == net) + nfsd_file_unhash_and_dispose(nf, &dispose); + nf = rhashtable_walk_next(&iter); + } - spin_lock(&nfb->nfb_lock); - hlist_for_each_entry_safe(nf, next, &nfb->nfb_head, nf_node) { - if (net && nf->nf_net != net) - continue; - del = nfsd_file_unhash_and_release_locked(nf, &dispose); + rhashtable_walk_stop(&iter); + } while (nf == ERR_PTR(-EAGAIN)); + rhashtable_walk_exit(&iter); - /* - * Deadlock detected! Something marked this entry as - * unhased, but hasn't removed it from the hash list. - */ - WARN_ON_ONCE(!del); - } - spin_unlock(&nfb->nfb_lock); - nfsd_file_dispose_list(&dispose); - } + nfsd_file_dispose_list(&dispose); } static struct nfsd_fcache_disposal * -nfsd_alloc_fcache_disposal(struct net *net) +nfsd_alloc_fcache_disposal(void) { struct nfsd_fcache_disposal *l; @@ -761,7 +914,6 @@ nfsd_alloc_fcache_disposal(struct net *net) if (!l) return NULL; INIT_WORK(&l->work, nfsd_file_delayed_close); - l->net = net; spin_lock_init(&l->lock); INIT_LIST_HEAD(&l->freeme); return l; @@ -770,61 +922,40 @@ nfsd_alloc_fcache_disposal(struct net *net) static void nfsd_free_fcache_disposal(struct nfsd_fcache_disposal *l) { - rcu_assign_pointer(l->net, NULL); cancel_work_sync(&l->work); nfsd_file_dispose_list(&l->freeme); - kfree_rcu(l, rcu); + kfree(l); } static void -nfsd_add_fcache_disposal(struct nfsd_fcache_disposal *l) +nfsd_free_fcache_disposal_net(struct net *net) { - spin_lock(&laundrette_lock); - list_add_tail_rcu(&l->list, &laundrettes); - spin_unlock(&laundrette_lock); -} + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + struct nfsd_fcache_disposal *l = nn->fcache_disposal; -static void -nfsd_del_fcache_disposal(struct nfsd_fcache_disposal *l) -{ - spin_lock(&laundrette_lock); - list_del_rcu(&l->list); - spin_unlock(&laundrette_lock); + nfsd_free_fcache_disposal(l); } -static int -nfsd_alloc_fcache_disposal_net(struct net *net) -{ - struct nfsd_fcache_disposal *l; - - l = nfsd_alloc_fcache_disposal(net); - if (!l) - return -ENOMEM; - nfsd_add_fcache_disposal(l); - return 0; -} - -static void -nfsd_free_fcache_disposal_net(struct net *net) +int +nfsd_file_cache_start_net(struct net *net) { - struct nfsd_fcache_disposal *l; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); - rcu_read_lock(); - list_for_each_entry_rcu(l, &laundrettes, list) { - if (l->net != net) - continue; - nfsd_del_fcache_disposal(l); - rcu_read_unlock(); - nfsd_free_fcache_disposal(l); - return; - } - rcu_read_unlock(); + nn->fcache_disposal = nfsd_alloc_fcache_disposal(); + return nn->fcache_disposal ? 0 : -ENOMEM; } -int -nfsd_file_cache_start_net(struct net *net) +/** + * nfsd_file_cache_purge - Remove all cache items associated with @net + * @net: target net namespace + * + */ +void +nfsd_file_cache_purge(struct net *net) { - return nfsd_alloc_fcache_disposal_net(net); + lockdep_assert_held(&nfsd_mutex); + if (test_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags) == 1) + __nfsd_file_cache_purge(net); } void @@ -837,7 +968,11 @@ nfsd_file_cache_shutdown_net(struct net *net) void nfsd_file_cache_shutdown(void) { - set_bit(NFSD_FILE_SHUTDOWN, &nfsd_file_lru_flags); + int i; + + lockdep_assert_held(&nfsd_mutex); + if (test_and_clear_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags) == 0) + return; lease_unregister_notifier(&nfsd_file_lease_notifier); unregister_shrinker(&nfsd_file_shrinker); @@ -846,7 +981,7 @@ nfsd_file_cache_shutdown(void) * calling nfsd_file_cache_purge */ cancel_delayed_work_sync(&nfsd_filecache_laundrette); - nfsd_file_cache_purge(NULL); + __nfsd_file_cache_purge(NULL); list_lru_destroy(&nfsd_file_lru); rcu_barrier(); fsnotify_put_group(nfsd_file_fsnotify_group); @@ -856,238 +991,241 @@ nfsd_file_cache_shutdown(void) fsnotify_wait_marks_destroyed(); kmem_cache_destroy(nfsd_file_mark_slab); nfsd_file_mark_slab = NULL; - kfree(nfsd_file_hashtbl); - nfsd_file_hashtbl = NULL; destroy_workqueue(nfsd_filecache_wq); nfsd_filecache_wq = NULL; -} - -static bool -nfsd_match_cred(const struct cred *c1, const struct cred *c2) -{ - int i; - - if (!uid_eq(c1->fsuid, c2->fsuid)) - return false; - if (!gid_eq(c1->fsgid, c2->fsgid)) - return false; - if (c1->group_info == NULL || c2->group_info == NULL) - return c1->group_info == c2->group_info; - if (c1->group_info->ngroups != c2->group_info->ngroups) - return false; - for (i = 0; i < c1->group_info->ngroups; i++) { - if (!gid_eq(c1->group_info->gid[i], c2->group_info->gid[i])) - return false; - } - return true; -} - -static struct nfsd_file * -nfsd_file_find_locked(struct inode *inode, unsigned int may_flags, - unsigned int hashval, struct net *net) -{ - struct nfsd_file *nf; - unsigned char need = may_flags & NFSD_FILE_MAY_MASK; - - hlist_for_each_entry_rcu(nf, &nfsd_file_hashtbl[hashval].nfb_head, - nf_node) { - if ((need & nf->nf_may) != need) - continue; - if (nf->nf_inode != inode) - continue; - if (nf->nf_net != net) - continue; - if (!nfsd_match_cred(nf->nf_cred, current_cred())) - continue; - if (nfsd_file_get(nf) != NULL) - return nf; + rhashtable_destroy(&nfsd_file_rhash_tbl); + + for_each_possible_cpu(i) { + per_cpu(nfsd_file_cache_hits, i) = 0; + per_cpu(nfsd_file_acquisitions, i) = 0; + per_cpu(nfsd_file_releases, i) = 0; + per_cpu(nfsd_file_total_age, i) = 0; + per_cpu(nfsd_file_pages_flushed, i) = 0; + per_cpu(nfsd_file_evictions, i) = 0; } - return NULL; } /** - * nfsd_file_is_cached - are there any cached open files for this fh? - * @inode: inode of the file to check + * nfsd_file_is_cached - are there any cached open files for this inode? + * @inode: inode to check * - * Scan the hashtable for open files that match this fh. Returns true if there - * are any, and false if not. + * The lookup matches inodes in all net namespaces and is atomic wrt + * nfsd_file_acquire(). + * + * Return values: + * %true: filecache contains at least one file matching this inode + * %false: filecache contains no files matching this inode */ bool nfsd_file_is_cached(struct inode *inode) { - bool ret = false; - struct nfsd_file *nf; - unsigned int hashval; - - hashval = (unsigned int)hash_long(inode->i_ino, NFSD_FILE_HASH_BITS); - - rcu_read_lock(); - hlist_for_each_entry_rcu(nf, &nfsd_file_hashtbl[hashval].nfb_head, - nf_node) { - if (inode == nf->nf_inode) { - ret = true; - break; - } - } - rcu_read_unlock(); - trace_nfsd_file_is_cached(inode, hashval, (int)ret); + struct nfsd_file_lookup_key key = { + .type = NFSD_FILE_KEY_INODE, + .inode = inode, + }; + bool ret = false; + + if (rhashtable_lookup_fast(&nfsd_file_rhash_tbl, &key, + nfsd_file_rhash_params) != NULL) + ret = true; + trace_nfsd_file_is_cached(inode, (int)ret); return ret; } -__be32 -nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, - unsigned int may_flags, struct nfsd_file **pnf) +static __be32 +nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, + unsigned int may_flags, struct nfsd_file **pnf, bool open) { - __be32 status; - struct net *net = SVC_NET(rqstp); - struct nfsd_file *nf, *new; - struct inode *inode; - unsigned int hashval; - bool retry = true; - - /* FIXME: skip this if fh_dentry is already set? */ + struct nfsd_file_lookup_key key = { + .type = NFSD_FILE_KEY_FULL, + .need = may_flags & NFSD_FILE_MAY_MASK, + .net = SVC_NET(rqstp), + }; + bool open_retry = true; + struct nfsd_file *nf; + __be32 status; + int ret; + status = fh_verify(rqstp, fhp, S_IFREG, may_flags|NFSD_MAY_OWNER_OVERRIDE); if (status != nfs_ok) return status; + key.inode = d_inode(fhp->fh_dentry); + key.cred = get_current_cred(); - inode = d_inode(fhp->fh_dentry); - hashval = (unsigned int)hash_long(inode->i_ino, NFSD_FILE_HASH_BITS); retry: rcu_read_lock(); - nf = nfsd_file_find_locked(inode, may_flags, hashval, net); + nf = rhashtable_lookup(&nfsd_file_rhash_tbl, &key, + nfsd_file_rhash_params); + if (nf) + nf = nfsd_file_get(nf); rcu_read_unlock(); if (nf) goto wait_for_construction; - new = nfsd_file_alloc(inode, may_flags, hashval, net); - if (!new) { - trace_nfsd_file_acquire(rqstp, hashval, inode, may_flags, - NULL, nfserr_jukebox); - return nfserr_jukebox; + nf = nfsd_file_alloc(&key, may_flags); + if (!nf) { + status = nfserr_jukebox; + goto out_status; } - spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock); - nf = nfsd_file_find_locked(inode, may_flags, hashval, net); - if (nf == NULL) + ret = rhashtable_lookup_insert_key(&nfsd_file_rhash_tbl, + &key, &nf->nf_rhash, + nfsd_file_rhash_params); + if (likely(ret == 0)) goto open_file; - spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock); - nfsd_file_slab_free(&new->nf_rcu); + + nfsd_file_slab_free(&nf->nf_rcu); + nf = NULL; + if (ret == -EEXIST) + goto retry; + trace_nfsd_file_insert_err(rqstp, key.inode, may_flags, ret); + status = nfserr_jukebox; + goto out_status; wait_for_construction: wait_on_bit(&nf->nf_flags, NFSD_FILE_PENDING, TASK_UNINTERRUPTIBLE); /* Did construction of this file fail? */ if (!test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { - if (!retry) { + trace_nfsd_file_cons_err(rqstp, key.inode, may_flags, nf); + if (!open_retry) { status = nfserr_jukebox; goto out; } - retry = false; + open_retry = false; nfsd_file_put_noref(nf); goto retry; } + nfsd_file_lru_remove(nf); this_cpu_inc(nfsd_file_cache_hits); - if (!(may_flags & NFSD_MAY_NOT_BREAK_LEASE)) { - bool write = (may_flags & NFSD_MAY_WRITE); - - if (test_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags) || - (test_bit(NFSD_FILE_BREAK_WRITE, &nf->nf_flags) && write)) { - status = nfserrno(nfsd_open_break_lease( - file_inode(nf->nf_file), may_flags)); - if (status == nfs_ok) { - clear_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags); - if (write) - clear_bit(NFSD_FILE_BREAK_WRITE, - &nf->nf_flags); - } - } - } + status = nfserrno(nfsd_open_break_lease(file_inode(nf->nf_file), may_flags)); out: if (status == nfs_ok) { + if (open) + this_cpu_inc(nfsd_file_acquisitions); *pnf = nf; } else { nfsd_file_put(nf); nf = NULL; } - trace_nfsd_file_acquire(rqstp, hashval, inode, may_flags, nf, status); +out_status: + put_cred(key.cred); + if (open) + trace_nfsd_file_acquire(rqstp, key.inode, may_flags, nf, status); return status; + open_file: - nf = new; - /* Take reference for the hashtable */ - refcount_inc(&nf->nf_ref); - __set_bit(NFSD_FILE_HASHED, &nf->nf_flags); - __set_bit(NFSD_FILE_PENDING, &nf->nf_flags); - list_lru_add(&nfsd_file_lru, &nf->nf_lru); - hlist_add_head_rcu(&nf->nf_node, &nfsd_file_hashtbl[hashval].nfb_head); - ++nfsd_file_hashtbl[hashval].nfb_count; - nfsd_file_hashtbl[hashval].nfb_maxcount = max(nfsd_file_hashtbl[hashval].nfb_maxcount, - nfsd_file_hashtbl[hashval].nfb_count); - spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock); - if (atomic_long_inc_return(&nfsd_filecache_count) >= NFSD_FILE_LRU_THRESHOLD) - nfsd_file_gc(); - - nf->nf_mark = nfsd_file_mark_find_or_create(nf); - if (nf->nf_mark) - status = nfsd_open_verified(rqstp, fhp, S_IFREG, - may_flags, &nf->nf_file); - else + trace_nfsd_file_alloc(nf); + nf->nf_mark = nfsd_file_mark_find_or_create(nf, key.inode); + if (nf->nf_mark) { + if (open) { + status = nfsd_open_verified(rqstp, fhp, may_flags, + &nf->nf_file); + trace_nfsd_file_open(nf, status); + } else + status = nfs_ok; + } else status = nfserr_jukebox; /* * If construction failed, or we raced with a call to unlink() * then unhash. */ - if (status != nfs_ok || inode->i_nlink == 0) { - bool do_free; - spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock); - do_free = nfsd_file_unhash(nf); - spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock); - if (do_free) + if (status != nfs_ok || key.inode->i_nlink == 0) + if (nfsd_file_unhash(nf)) nfsd_file_put_noref(nf); - } clear_bit_unlock(NFSD_FILE_PENDING, &nf->nf_flags); smp_mb__after_atomic(); wake_up_bit(&nf->nf_flags, NFSD_FILE_PENDING); goto out; } +/** + * nfsd_file_acquire - Get a struct nfsd_file with an open file + * @rqstp: the RPC transaction being executed + * @fhp: the NFS filehandle of the file to be opened + * @may_flags: NFSD_MAY_ settings for the file + * @pnf: OUT: new or found "struct nfsd_file" object + * + * Returns nfs_ok and sets @pnf on success; otherwise an nfsstat in + * network byte order is returned. + */ +__be32 +nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, + unsigned int may_flags, struct nfsd_file **pnf) +{ + return nfsd_file_do_acquire(rqstp, fhp, may_flags, pnf, true); +} + +/** + * nfsd_file_create - Get a struct nfsd_file, do not open + * @rqstp: the RPC transaction being executed + * @fhp: the NFS filehandle of the file just created + * @may_flags: NFSD_MAY_ settings for the file + * @pnf: OUT: new or found "struct nfsd_file" object + * + * Returns nfs_ok and sets @pnf on success; otherwise an nfsstat in + * network byte order is returned. + */ +__be32 +nfsd_file_create(struct svc_rqst *rqstp, struct svc_fh *fhp, + unsigned int may_flags, struct nfsd_file **pnf) +{ + return nfsd_file_do_acquire(rqstp, fhp, may_flags, pnf, false); +} + /* * Note that fields may be added, removed or reordered in the future. Programs * scraping this file for info should test the labels to ensure they're * getting the correct field. */ -static int nfsd_file_cache_stats_show(struct seq_file *m, void *v) +int nfsd_file_cache_stats_show(struct seq_file *m, void *v) { - unsigned int i, count = 0, longest = 0; - unsigned long hits = 0; + unsigned long releases = 0, pages_flushed = 0, evictions = 0; + unsigned long hits = 0, acquisitions = 0; + unsigned int i, count = 0, buckets = 0; + unsigned long lru = 0, total_age = 0; - /* - * No need for spinlocks here since we're not terribly interested in - * accuracy. We do take the nfsd_mutex simply to ensure that we - * don't end up racing with server shutdown - */ + /* Serialize with server shutdown */ mutex_lock(&nfsd_mutex); - if (nfsd_file_hashtbl) { - for (i = 0; i < NFSD_FILE_HASH_SIZE; i++) { - count += nfsd_file_hashtbl[i].nfb_count; - longest = max(longest, nfsd_file_hashtbl[i].nfb_count); - } + if (test_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags) == 1) { + struct bucket_table *tbl; + struct rhashtable *ht; + + lru = list_lru_count(&nfsd_file_lru); + + rcu_read_lock(); + ht = &nfsd_file_rhash_tbl; + count = atomic_read(&ht->nelems); + tbl = rht_dereference_rcu(ht->tbl, ht); + buckets = tbl->size; + rcu_read_unlock(); } mutex_unlock(&nfsd_mutex); - for_each_possible_cpu(i) + for_each_possible_cpu(i) { hits += per_cpu(nfsd_file_cache_hits, i); + acquisitions += per_cpu(nfsd_file_acquisitions, i); + releases += per_cpu(nfsd_file_releases, i); + total_age += per_cpu(nfsd_file_total_age, i); + evictions += per_cpu(nfsd_file_evictions, i); + pages_flushed += per_cpu(nfsd_file_pages_flushed, i); + } seq_printf(m, "total entries: %u\n", count); - seq_printf(m, "longest chain: %u\n", longest); + seq_printf(m, "hash buckets: %u\n", buckets); + seq_printf(m, "lru entries: %lu\n", lru); seq_printf(m, "cache hits: %lu\n", hits); + seq_printf(m, "acquisitions: %lu\n", acquisitions); + seq_printf(m, "releases: %lu\n", releases); + seq_printf(m, "evictions: %lu\n", evictions); + if (releases) + seq_printf(m, "mean age (ms): %ld\n", total_age / releases); + else + seq_printf(m, "mean age (ms): -\n"); + seq_printf(m, "pages flushed: %lu\n", pages_flushed); return 0; } - -int nfsd_file_cache_stats_open(struct inode *inode, struct file *file) -{ - return single_open(file, nfsd_file_cache_stats_show, NULL); -} diff --git a/fs/nfsd/filecache.h b/fs/nfsd/filecache.h index 7872df5a0fe3..357832bac736 100644 --- a/fs/nfsd/filecache.h +++ b/fs/nfsd/filecache.h @@ -29,7 +29,7 @@ struct nfsd_file_mark { * never be dereferenced, only used for comparison. */ struct nfsd_file { - struct hlist_node nf_node; + struct rhash_head nf_rhash; struct list_head nf_lru; struct rcu_head nf_rcu; struct file *nf_file; @@ -37,16 +37,13 @@ struct nfsd_file { struct net *nf_net; #define NFSD_FILE_HASHED (0) #define NFSD_FILE_PENDING (1) -#define NFSD_FILE_BREAK_READ (2) -#define NFSD_FILE_BREAK_WRITE (3) -#define NFSD_FILE_REFERENCED (4) +#define NFSD_FILE_REFERENCED (2) unsigned long nf_flags; - struct inode *nf_inode; - unsigned int nf_hashval; + struct inode *nf_inode; /* don't deref */ refcount_t nf_ref; unsigned char nf_may; struct nfsd_file_mark *nf_mark; - struct rw_semaphore nf_rwsem; + ktime_t nf_birthtime; }; int nfsd_file_cache_init(void); @@ -55,10 +52,13 @@ void nfsd_file_cache_shutdown(void); int nfsd_file_cache_start_net(struct net *net); void nfsd_file_cache_shutdown_net(struct net *net); void nfsd_file_put(struct nfsd_file *nf); +void nfsd_file_close(struct nfsd_file *nf); struct nfsd_file *nfsd_file_get(struct nfsd_file *nf); void nfsd_file_close_inode_sync(struct inode *inode); bool nfsd_file_is_cached(struct inode *inode); __be32 nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, unsigned int may_flags, struct nfsd_file **nfp); -int nfsd_file_cache_stats_open(struct inode *, struct file *); +__be32 nfsd_file_create(struct svc_rqst *rqstp, struct svc_fh *fhp, + unsigned int may_flags, struct nfsd_file **nfp); +int nfsd_file_cache_stats_show(struct seq_file *m, void *v); #endif /* _FS_NFSD_FILECACHE_H */ diff --git a/fs/nfsd/flexfilelayout.c b/fs/nfsd/flexfilelayout.c index db7ef07ae50c..070f90ed09b6 100644 --- a/fs/nfsd/flexfilelayout.c +++ b/fs/nfsd/flexfilelayout.c @@ -61,7 +61,7 @@ nfsd4_ff_proc_layoutget(struct inode *inode, const struct svc_fh *fhp, goto out_error; fl->fh.size = fhp->fh_handle.fh_size; - memcpy(fl->fh.data, &fhp->fh_handle.fh_base, fl->fh.size); + memcpy(fl->fh.data, &fhp->fh_handle.fh_raw, fl->fh.size); /* Give whole file layout segments */ seg->offset = 0; @@ -117,7 +117,7 @@ nfsd4_ff_proc_getdeviceinfo(struct super_block *sb, struct svc_rqst *rqstp, da->netaddr.addr_len = snprintf(da->netaddr.addr, FF_ADDR_LEN + 1, - "%s.%hhu.%hhu", addr, port >> 8, port & 0xff); + "%s.%d.%d", addr, port >> 8, port & 0xff); da->tightly_coupled = false; diff --git a/fs/nfsd/lockd.c b/fs/nfsd/lockd.c index 3f5b3d7b62b7..46a7f9b813e5 100644 --- a/fs/nfsd/lockd.c +++ b/fs/nfsd/lockd.c @@ -25,18 +25,22 @@ * Note: we hold the dentry use count while the file is open. */ static __be32 -nlm_fopen(struct svc_rqst *rqstp, struct nfs_fh *f, struct file **filp) +nlm_fopen(struct svc_rqst *rqstp, struct nfs_fh *f, struct file **filp, + int mode) { __be32 nfserr; + int access; struct svc_fh fh; /* must initialize before using! but maxsize doesn't matter */ fh_init(&fh,0); fh.fh_handle.fh_size = f->size; - memcpy((char*)&fh.fh_handle.fh_base, f->data, f->size); + memcpy(&fh.fh_handle.fh_raw, f->data, f->size); fh.fh_export = NULL; - nfserr = nfsd_open(rqstp, &fh, S_IFREG, NFSD_MAY_LOCK, filp); + access = (mode == O_WRONLY) ? NFSD_MAY_WRITE : NFSD_MAY_READ; + access |= NFSD_MAY_LOCK; + nfserr = nfsd_open(rqstp, &fh, S_IFREG, access, filp); fh_put(&fh); /* We return nlm error codes as nlm doesn't know * about nfsd, but nfsd does know about nlm.. diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h index 2baf32311e00..8c854ba3285b 100644 --- a/fs/nfsd/netns.h +++ b/fs/nfsd/netns.h @@ -10,6 +10,8 @@ #include <net/net_namespace.h> #include <net/netns/generic.h> +#include <linux/percpu_counter.h> +#include <linux/siphash.h> /* Hash tables for nfs4_clientid state */ #define CLIENT_HASH_BITS 4 @@ -21,6 +23,14 @@ struct cld_net; struct nfsd4_client_tracking_ops; +enum { + /* cache misses due only to checksum comparison failures */ + NFSD_NET_PAYLOAD_MISSES, + /* amount of memory (in bytes) currently consumed by the DRC */ + NFSD_NET_DRC_MEM_USAGE, + NFSD_NET_COUNTERS_NUM +}; + /* * Represents a nfsd "container". With respect to nfsv4 state tracking, the * fields of interest are the *_id_hashtbls and the *_name_tree. These track @@ -42,9 +52,6 @@ struct nfsd_net { bool grace_ended; time64_t boot_time; - /* internal mount of the "nfsd" pseudofilesystem: */ - struct vfsmount *nfsd_mnt; - struct dentry *nfsd_client_dir; /* @@ -102,9 +109,8 @@ struct nfsd_net { bool nfsd_net_up; bool lockd_up; - /* Time of server startup */ - struct timespec64 nfssvc_boot; - seqlock_t boot_lock; + seqlock_t writeverf_lock; + unsigned char writeverf[8]; /* * Max number of connections this nfsd container will allow. Defaults @@ -117,9 +123,13 @@ struct nfsd_net { u32 clverifier_counter; struct svc_serv *nfsd_serv; - - wait_queue_head_t ntf_wq; - atomic_t ntf_refcnt; + /* When a listening socket is added to nfsd, keep_active is set + * and this justifies a reference on nfsd_serv. This stops + * nfsd_serv from being freed. When the number of threads is + * set, keep_active is cleared and the reference is dropped. So + * when the last thread exits, the service will be destroyed. + */ + int keep_active; /* * clientid and stateid data for construction of net unique COPY @@ -139,7 +149,6 @@ struct nfsd_net { * Duplicate reply cache */ struct nfsd_drc_bucket *drc_hashtbl; - struct kmem_cache *drc_slab; /* max number of entries allowed in the cache */ unsigned int max_drc_entries; @@ -150,20 +159,16 @@ struct nfsd_net { /* * Stats and other tracking of on the duplicate reply cache. - * These fields and the "rc" fields in nfsdstats are modified - * with only the per-bucket cache lock, which isn't really safe - * and should be fixed if we want the statistics to be - * completely accurate. + * The longest_chain* fields are modified with only the per-bucket + * cache lock, which isn't really safe and should be fixed if we want + * these statistics to be completely accurate. */ /* total number of entries */ atomic_t num_drc_entries; - /* cache misses due only to checksum comparison failures */ - unsigned int payload_misses; - - /* amount of memory (in bytes) currently consumed by the DRC */ - unsigned int drc_mem_usage; + /* Per-netns stats counters */ + struct percpu_counter counter[NFSD_NET_COUNTERS_NUM]; /* longest hash chain seen */ unsigned int longest_chain; @@ -172,6 +177,25 @@ struct nfsd_net { unsigned int longest_chain_cachesize; struct shrinker nfsd_reply_cache_shrinker; + + /* tracking server-to-server copy mounts */ + spinlock_t nfsd_ssc_lock; + struct list_head nfsd_ssc_mount_list; + wait_queue_head_t nfsd_ssc_waitq; + + /* utsname taken from the process that starts the server */ + char nfsd_name[UNX_MAXNODENAME+1]; + + struct nfsd_fcache_disposal *fcache_disposal; + + siphash_key_t siphash_key; + + atomic_t nfs4_client_count; + int nfs4_max_clients; + + atomic_t nfsd_courtesy_clients; + struct shrinker nfsd_client_shrinker; + struct delayed_work nfsd_shrinker_work; }; /* Simple check to find out if a given net was properly initialized */ @@ -181,6 +205,6 @@ extern void nfsd_netns_free_versions(struct nfsd_net *nn); extern unsigned int nfsd_net_id; -void nfsd_copy_boot_verifier(__be32 verf[2], struct nfsd_net *nn); -void nfsd_reset_boot_verifier(struct nfsd_net *nn); +void nfsd_copy_write_verifier(__be32 verf[2], struct nfsd_net *nn); +void nfsd_reset_write_verifier(struct nfsd_net *nn); #endif /* __NFSD_NETNS_H__ */ diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c index cbab1d2d8a75..13e6e6897f6c 100644 --- a/fs/nfsd/nfs2acl.c +++ b/fs/nfsd/nfs2acl.c @@ -14,7 +14,6 @@ #include "vfs.h" #define NFSDDBG_FACILITY NFSDDBG_PROC -#define RETURN_STATUS(st) { resp->status = (st); return (st); } /* * NULL call. @@ -22,7 +21,7 @@ static __be32 nfsacld_proc_null(struct svc_rqst *rqstp) { - return nfs_ok; + return rpc_success; } /* @@ -35,24 +34,25 @@ static __be32 nfsacld_proc_getacl(struct svc_rqst *rqstp) struct posix_acl *acl; struct inode *inode; svc_fh *fh; - __be32 nfserr = 0; dprintk("nfsd: GETACL(2acl) %s\n", SVCFH_fmt(&argp->fh)); fh = fh_copy(&resp->fh, &argp->fh); - nfserr = fh_verify(rqstp, &resp->fh, 0, NFSD_MAY_NOP); - if (nfserr) - RETURN_STATUS(nfserr); + resp->status = fh_verify(rqstp, &resp->fh, 0, NFSD_MAY_NOP); + if (resp->status != nfs_ok) + goto out; inode = d_inode(fh->fh_dentry); - if (argp->mask & ~NFS_ACL_MASK) - RETURN_STATUS(nfserr_inval); + if (argp->mask & ~NFS_ACL_MASK) { + resp->status = nfserr_inval; + goto out; + } resp->mask = argp->mask; - nfserr = fh_getattr(fh, &resp->stat); - if (nfserr) - RETURN_STATUS(nfserr); + resp->status = fh_getattr(fh, &resp->stat); + if (resp->status != nfs_ok) + goto out; if (resp->mask & (NFS_ACL|NFS_ACLCNT)) { acl = get_acl(inode, ACL_TYPE_ACCESS); @@ -61,7 +61,7 @@ static __be32 nfsacld_proc_getacl(struct svc_rqst *rqstp) acl = posix_acl_from_mode(inode->i_mode, GFP_KERNEL); } if (IS_ERR(acl)) { - nfserr = nfserrno(PTR_ERR(acl)); + resp->status = nfserrno(PTR_ERR(acl)); goto fail; } resp->acl_access = acl; @@ -71,19 +71,20 @@ static __be32 nfsacld_proc_getacl(struct svc_rqst *rqstp) of a non-directory! */ acl = get_acl(inode, ACL_TYPE_DEFAULT); if (IS_ERR(acl)) { - nfserr = nfserrno(PTR_ERR(acl)); + resp->status = nfserrno(PTR_ERR(acl)); goto fail; } resp->acl_default = acl; } /* resp->acl_{access,default} are released in nfssvc_release_getacl. */ - RETURN_STATUS(0); +out: + return rpc_success; fail: posix_acl_release(resp->acl_access); posix_acl_release(resp->acl_default); - RETURN_STATUS(nfserr); + goto out; } /* @@ -95,14 +96,13 @@ static __be32 nfsacld_proc_setacl(struct svc_rqst *rqstp) struct nfsd_attrstat *resp = rqstp->rq_resp; struct inode *inode; svc_fh *fh; - __be32 nfserr = 0; int error; dprintk("nfsd: SETACL(2acl) %s\n", SVCFH_fmt(&argp->fh)); fh = fh_copy(&resp->fh, &argp->fh); - nfserr = fh_verify(rqstp, &resp->fh, 0, NFSD_MAY_SATTR); - if (nfserr) + resp->status = fh_verify(rqstp, &resp->fh, 0, NFSD_MAY_SATTR); + if (resp->status != nfs_ok) goto out; inode = d_inode(fh->fh_dentry); @@ -111,32 +111,35 @@ static __be32 nfsacld_proc_setacl(struct svc_rqst *rqstp) if (error) goto out_errno; - fh_lock(fh); + inode_lock(inode); - error = set_posix_acl(inode, ACL_TYPE_ACCESS, argp->acl_access); + error = set_posix_acl(&init_user_ns, inode, ACL_TYPE_ACCESS, + argp->acl_access); if (error) goto out_drop_lock; - error = set_posix_acl(inode, ACL_TYPE_DEFAULT, argp->acl_default); + error = set_posix_acl(&init_user_ns, inode, ACL_TYPE_DEFAULT, + argp->acl_default); if (error) goto out_drop_lock; - fh_unlock(fh); + inode_unlock(inode); fh_drop_write(fh); - nfserr = fh_getattr(fh, &resp->stat); + resp->status = fh_getattr(fh, &resp->stat); out: /* argp->acl_{access,default} may have been allocated in nfssvc_decode_setaclargs. */ posix_acl_release(argp->acl_access); posix_acl_release(argp->acl_default); - return nfserr; + return rpc_success; + out_drop_lock: - fh_unlock(fh); + inode_unlock(inode); fh_drop_write(fh); out_errno: - nfserr = nfserrno(error); + resp->status = nfserrno(error); goto out; } @@ -147,15 +150,16 @@ static __be32 nfsacld_proc_getattr(struct svc_rqst *rqstp) { struct nfsd_fhandle *argp = rqstp->rq_argp; struct nfsd_attrstat *resp = rqstp->rq_resp; - __be32 nfserr; + dprintk("nfsd: GETATTR %s\n", SVCFH_fmt(&argp->fh)); fh_copy(&resp->fh, &argp->fh); - nfserr = fh_verify(rqstp, &resp->fh, 0, NFSD_MAY_NOP); - if (nfserr) - return nfserr; - nfserr = fh_getattr(&resp->fh, &resp->stat); - return nfserr; + resp->status = fh_verify(rqstp, &resp->fh, 0, NFSD_MAY_NOP); + if (resp->status != nfs_ok) + goto out; + resp->status = fh_getattr(&resp->fh, &resp->stat); +out: + return rpc_success; } /* @@ -165,7 +169,6 @@ static __be32 nfsacld_proc_access(struct svc_rqst *rqstp) { struct nfsd3_accessargs *argp = rqstp->rq_argp; struct nfsd3_accessres *resp = rqstp->rq_resp; - __be32 nfserr; dprintk("nfsd: ACCESS(2acl) %s 0x%x\n", SVCFH_fmt(&argp->fh), @@ -173,152 +176,127 @@ static __be32 nfsacld_proc_access(struct svc_rqst *rqstp) fh_copy(&resp->fh, &argp->fh); resp->access = argp->access; - nfserr = nfsd_access(rqstp, &resp->fh, &resp->access, NULL); - if (nfserr) - return nfserr; - nfserr = fh_getattr(&resp->fh, &resp->stat); - return nfserr; + resp->status = nfsd_access(rqstp, &resp->fh, &resp->access, NULL); + if (resp->status != nfs_ok) + goto out; + resp->status = fh_getattr(&resp->fh, &resp->stat); +out: + return rpc_success; } /* * XDR decode functions */ -static int nfsaclsvc_decode_getaclargs(struct svc_rqst *rqstp, __be32 *p) + +static bool +nfsaclsvc_decode_getaclargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nfsd3_getaclargs *argp = rqstp->rq_argp; - p = nfs2svc_decode_fh(p, &argp->fh); - if (!p) - return 0; - argp->mask = ntohl(*p); p++; + if (!svcxdr_decode_fhandle(xdr, &argp->fh)) + return false; + if (xdr_stream_decode_u32(xdr, &argp->mask) < 0) + return false; - return xdr_argsize_check(rqstp, p); + return true; } - -static int nfsaclsvc_decode_setaclargs(struct svc_rqst *rqstp, __be32 *p) +static bool +nfsaclsvc_decode_setaclargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nfsd3_setaclargs *argp = rqstp->rq_argp; - struct kvec *head = rqstp->rq_arg.head; - unsigned int base; - int n; - - p = nfs2svc_decode_fh(p, &argp->fh); - if (!p) - return 0; - argp->mask = ntohl(*p++); - if (argp->mask & ~NFS_ACL_MASK || - !xdr_argsize_check(rqstp, p)) - return 0; - - base = (char *)p - (char *)head->iov_base; - n = nfsacl_decode(&rqstp->rq_arg, base, NULL, - (argp->mask & NFS_ACL) ? - &argp->acl_access : NULL); - if (n > 0) - n = nfsacl_decode(&rqstp->rq_arg, base + n, NULL, - (argp->mask & NFS_DFACL) ? - &argp->acl_default : NULL); - return (n > 0); -} -static int nfsaclsvc_decode_fhandleargs(struct svc_rqst *rqstp, __be32 *p) -{ - struct nfsd_fhandle *argp = rqstp->rq_argp; - - p = nfs2svc_decode_fh(p, &argp->fh); - if (!p) - return 0; - return xdr_argsize_check(rqstp, p); + if (!svcxdr_decode_fhandle(xdr, &argp->fh)) + return false; + if (xdr_stream_decode_u32(xdr, &argp->mask) < 0) + return false; + if (argp->mask & ~NFS_ACL_MASK) + return false; + if (!nfs_stream_decode_acl(xdr, NULL, (argp->mask & NFS_ACL) ? + &argp->acl_access : NULL)) + return false; + if (!nfs_stream_decode_acl(xdr, NULL, (argp->mask & NFS_DFACL) ? + &argp->acl_default : NULL)) + return false; + + return true; } -static int nfsaclsvc_decode_accessargs(struct svc_rqst *rqstp, __be32 *p) +static bool +nfsaclsvc_decode_accessargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct nfsd3_accessargs *argp = rqstp->rq_argp; + struct nfsd3_accessargs *args = rqstp->rq_argp; - p = nfs2svc_decode_fh(p, &argp->fh); - if (!p) - return 0; - argp->access = ntohl(*p++); + if (!svcxdr_decode_fhandle(xdr, &args->fh)) + return false; + if (xdr_stream_decode_u32(xdr, &args->access) < 0) + return false; - return xdr_argsize_check(rqstp, p); + return true; } /* * XDR encode functions */ -/* - * There must be an encoding function for void results so svc_process - * will work properly. - */ -static int nfsaclsvc_encode_voidres(struct svc_rqst *rqstp, __be32 *p) -{ - return xdr_ressize_check(rqstp, p); -} - /* GETACL */ -static int nfsaclsvc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p) +static bool +nfsaclsvc_encode_getaclres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nfsd3_getaclres *resp = rqstp->rq_resp; struct dentry *dentry = resp->fh.fh_dentry; struct inode *inode; - struct kvec *head = rqstp->rq_res.head; - unsigned int base; - int n; int w; - /* - * Since this is version 2, the check for nfserr in - * nfsd_dispatch actually ensures the following cannot happen. - * However, it seems fragile to depend on that. - */ + if (!svcxdr_encode_stat(xdr, resp->status)) + return false; + if (dentry == NULL || d_really_is_negative(dentry)) - return 0; + return true; inode = d_inode(dentry); - p = nfs2svc_encode_fattr(rqstp, p, &resp->fh, &resp->stat); - *p++ = htonl(resp->mask); - if (!xdr_ressize_check(rqstp, p)) - return 0; - base = (char *)p - (char *)head->iov_base; + if (!svcxdr_encode_fattr(rqstp, xdr, &resp->fh, &resp->stat)) + return false; + if (xdr_stream_encode_u32(xdr, resp->mask) < 0) + return false; rqstp->rq_res.page_len = w = nfsacl_size( (resp->mask & NFS_ACL) ? resp->acl_access : NULL, (resp->mask & NFS_DFACL) ? resp->acl_default : NULL); while (w > 0) { if (!*(rqstp->rq_next_page++)) - return 0; + return true; w -= PAGE_SIZE; } - n = nfsacl_encode(&rqstp->rq_res, base, inode, - resp->acl_access, - resp->mask & NFS_ACL, 0); - if (n > 0) - n = nfsacl_encode(&rqstp->rq_res, base + n, inode, - resp->acl_default, - resp->mask & NFS_DFACL, - NFS_ACL_DEFAULT); - return (n > 0); -} - -static int nfsaclsvc_encode_attrstatres(struct svc_rqst *rqstp, __be32 *p) -{ - struct nfsd_attrstat *resp = rqstp->rq_resp; + if (!nfs_stream_encode_acl(xdr, inode, resp->acl_access, + resp->mask & NFS_ACL, 0)) + return false; + if (!nfs_stream_encode_acl(xdr, inode, resp->acl_default, + resp->mask & NFS_DFACL, NFS_ACL_DEFAULT)) + return false; - p = nfs2svc_encode_fattr(rqstp, p, &resp->fh, &resp->stat); - return xdr_ressize_check(rqstp, p); + return true; } /* ACCESS */ -static int nfsaclsvc_encode_accessres(struct svc_rqst *rqstp, __be32 *p) +static bool +nfsaclsvc_encode_accessres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nfsd3_accessres *resp = rqstp->rq_resp; - p = nfs2svc_encode_fattr(rqstp, p, &resp->fh, &resp->stat); - *p++ = htonl(resp->access); - return xdr_ressize_check(rqstp, p); + if (!svcxdr_encode_stat(xdr, resp->status)) + return false; + switch (resp->status) { + case nfs_ok: + if (!svcxdr_encode_fattr(rqstp, xdr, &resp->fh, &resp->stat)) + return false; + if (xdr_stream_encode_u32(xdr, resp->access) < 0) + return false; + break; + } + + return true; } /* @@ -333,13 +311,6 @@ static void nfsaclsvc_release_getacl(struct svc_rqst *rqstp) posix_acl_release(resp->acl_default); } -static void nfsaclsvc_release_attrstat(struct svc_rqst *rqstp) -{ - struct nfsd_attrstat *resp = rqstp->rq_resp; - - fh_put(&resp->fh); -} - static void nfsaclsvc_release_access(struct svc_rqst *rqstp) { struct nfsd3_accessres *resp = rqstp->rq_resp; @@ -347,36 +318,73 @@ static void nfsaclsvc_release_access(struct svc_rqst *rqstp) fh_put(&resp->fh); } -#define nfsaclsvc_decode_voidargs NULL -#define nfsaclsvc_release_void NULL -#define nfsd3_fhandleargs nfsd_fhandle -#define nfsd3_attrstatres nfsd_attrstat -#define nfsd3_voidres nfsd3_voidargs struct nfsd3_voidargs { int dummy; }; -#define PROC(name, argt, rest, relt, cache, respsize) \ -{ \ - .pc_func = nfsacld_proc_##name, \ - .pc_decode = nfsaclsvc_decode_##argt##args, \ - .pc_encode = nfsaclsvc_encode_##rest##res, \ - .pc_release = nfsaclsvc_release_##relt, \ - .pc_argsize = sizeof(struct nfsd3_##argt##args), \ - .pc_ressize = sizeof(struct nfsd3_##rest##res), \ - .pc_cachetype = cache, \ - .pc_xdrressize = respsize, \ -} - #define ST 1 /* status*/ #define AT 21 /* attributes */ #define pAT (1+AT) /* post attributes - conditional */ #define ACL (1+NFS_ACL_MAX_ENTRIES*3) /* Access Control List */ -static const struct svc_procedure nfsd_acl_procedures2[] = { - PROC(null, void, void, void, RC_NOCACHE, ST), - PROC(getacl, getacl, getacl, getacl, RC_NOCACHE, ST+1+2*(1+ACL)), - PROC(setacl, setacl, attrstat, attrstat, RC_NOCACHE, ST+AT), - PROC(getattr, fhandle, attrstat, attrstat, RC_NOCACHE, ST+AT), - PROC(access, access, access, access, RC_NOCACHE, ST+AT+1), +static const struct svc_procedure nfsd_acl_procedures2[5] = { + [ACLPROC2_NULL] = { + .pc_func = nfsacld_proc_null, + .pc_decode = nfssvc_decode_voidarg, + .pc_encode = nfssvc_encode_voidres, + .pc_argsize = sizeof(struct nfsd_voidargs), + .pc_argzero = sizeof(struct nfsd_voidargs), + .pc_ressize = sizeof(struct nfsd_voidres), + .pc_cachetype = RC_NOCACHE, + .pc_xdrressize = ST, + .pc_name = "NULL", + }, + [ACLPROC2_GETACL] = { + .pc_func = nfsacld_proc_getacl, + .pc_decode = nfsaclsvc_decode_getaclargs, + .pc_encode = nfsaclsvc_encode_getaclres, + .pc_release = nfsaclsvc_release_getacl, + .pc_argsize = sizeof(struct nfsd3_getaclargs), + .pc_argzero = sizeof(struct nfsd3_getaclargs), + .pc_ressize = sizeof(struct nfsd3_getaclres), + .pc_cachetype = RC_NOCACHE, + .pc_xdrressize = ST+1+2*(1+ACL), + .pc_name = "GETACL", + }, + [ACLPROC2_SETACL] = { + .pc_func = nfsacld_proc_setacl, + .pc_decode = nfsaclsvc_decode_setaclargs, + .pc_encode = nfssvc_encode_attrstatres, + .pc_release = nfssvc_release_attrstat, + .pc_argsize = sizeof(struct nfsd3_setaclargs), + .pc_argzero = sizeof(struct nfsd3_setaclargs), + .pc_ressize = sizeof(struct nfsd_attrstat), + .pc_cachetype = RC_NOCACHE, + .pc_xdrressize = ST+AT, + .pc_name = "SETACL", + }, + [ACLPROC2_GETATTR] = { + .pc_func = nfsacld_proc_getattr, + .pc_decode = nfssvc_decode_fhandleargs, + .pc_encode = nfssvc_encode_attrstatres, + .pc_release = nfssvc_release_attrstat, + .pc_argsize = sizeof(struct nfsd_fhandle), + .pc_argzero = sizeof(struct nfsd_fhandle), + .pc_ressize = sizeof(struct nfsd_attrstat), + .pc_cachetype = RC_NOCACHE, + .pc_xdrressize = ST+AT, + .pc_name = "GETATTR", + }, + [ACLPROC2_ACCESS] = { + .pc_func = nfsacld_proc_access, + .pc_decode = nfsaclsvc_decode_accessargs, + .pc_encode = nfsaclsvc_encode_accessres, + .pc_release = nfsaclsvc_release_access, + .pc_argsize = sizeof(struct nfsd3_accessargs), + .pc_argzero = sizeof(struct nfsd3_accessargs), + .pc_ressize = sizeof(struct nfsd3_accessres), + .pc_cachetype = RC_NOCACHE, + .pc_xdrressize = ST+AT+1, + .pc_name = "SETATTR", + }, }; static unsigned int nfsd_acl_count2[ARRAY_SIZE(nfsd_acl_procedures2)]; diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c index 13bca4a2f89d..2fb9ee356455 100644 --- a/fs/nfsd/nfs3acl.c +++ b/fs/nfsd/nfs3acl.c @@ -13,15 +13,13 @@ #include "xdr3.h" #include "vfs.h" -#define RETURN_STATUS(st) { resp->status = (st); return (st); } - /* * NULL call. */ static __be32 nfsd3_proc_null(struct svc_rqst *rqstp) { - return nfs_ok; + return rpc_success; } /* @@ -34,17 +32,18 @@ static __be32 nfsd3_proc_getacl(struct svc_rqst *rqstp) struct posix_acl *acl; struct inode *inode; svc_fh *fh; - __be32 nfserr = 0; fh = fh_copy(&resp->fh, &argp->fh); - nfserr = fh_verify(rqstp, &resp->fh, 0, NFSD_MAY_NOP); - if (nfserr) - RETURN_STATUS(nfserr); + resp->status = fh_verify(rqstp, &resp->fh, 0, NFSD_MAY_NOP); + if (resp->status != nfs_ok) + goto out; inode = d_inode(fh->fh_dentry); - if (argp->mask & ~NFS_ACL_MASK) - RETURN_STATUS(nfserr_inval); + if (argp->mask & ~NFS_ACL_MASK) { + resp->status = nfserr_inval; + goto out; + } resp->mask = argp->mask; if (resp->mask & (NFS_ACL|NFS_ACLCNT)) { @@ -54,7 +53,7 @@ static __be32 nfsd3_proc_getacl(struct svc_rqst *rqstp) acl = posix_acl_from_mode(inode->i_mode, GFP_KERNEL); } if (IS_ERR(acl)) { - nfserr = nfserrno(PTR_ERR(acl)); + resp->status = nfserrno(PTR_ERR(acl)); goto fail; } resp->acl_access = acl; @@ -64,19 +63,20 @@ static __be32 nfsd3_proc_getacl(struct svc_rqst *rqstp) of a non-directory! */ acl = get_acl(inode, ACL_TYPE_DEFAULT); if (IS_ERR(acl)) { - nfserr = nfserrno(PTR_ERR(acl)); + resp->status = nfserrno(PTR_ERR(acl)); goto fail; } resp->acl_default = acl; } /* resp->acl_{access,default} are released in nfs3svc_release_getacl. */ - RETURN_STATUS(0); +out: + return rpc_success; fail: posix_acl_release(resp->acl_access); posix_acl_release(resp->acl_default); - RETURN_STATUS(nfserr); + goto out; } /* @@ -88,12 +88,11 @@ static __be32 nfsd3_proc_setacl(struct svc_rqst *rqstp) struct nfsd3_attrstat *resp = rqstp->rq_resp; struct inode *inode; svc_fh *fh; - __be32 nfserr = 0; int error; fh = fh_copy(&resp->fh, &argp->fh); - nfserr = fh_verify(rqstp, &resp->fh, 0, NFSD_MAY_SATTR); - if (nfserr) + resp->status = fh_verify(rqstp, &resp->fh, 0, NFSD_MAY_SATTR); + if (resp->status != nfs_ok) goto out; inode = d_inode(fh->fh_dentry); @@ -102,66 +101,64 @@ static __be32 nfsd3_proc_setacl(struct svc_rqst *rqstp) if (error) goto out_errno; - fh_lock(fh); + inode_lock(inode); - error = set_posix_acl(inode, ACL_TYPE_ACCESS, argp->acl_access); + error = set_posix_acl(&init_user_ns, inode, ACL_TYPE_ACCESS, + argp->acl_access); if (error) goto out_drop_lock; - error = set_posix_acl(inode, ACL_TYPE_DEFAULT, argp->acl_default); + error = set_posix_acl(&init_user_ns, inode, ACL_TYPE_DEFAULT, + argp->acl_default); out_drop_lock: - fh_unlock(fh); + inode_unlock(inode); fh_drop_write(fh); out_errno: - nfserr = nfserrno(error); + resp->status = nfserrno(error); out: /* argp->acl_{access,default} may have been allocated in nfs3svc_decode_setaclargs. */ posix_acl_release(argp->acl_access); posix_acl_release(argp->acl_default); - RETURN_STATUS(nfserr); + return rpc_success; } /* * XDR decode functions */ -static int nfs3svc_decode_getaclargs(struct svc_rqst *rqstp, __be32 *p) + +static bool +nfs3svc_decode_getaclargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nfsd3_getaclargs *args = rqstp->rq_argp; - p = nfs3svc_decode_fh(p, &args->fh); - if (!p) - return 0; - args->mask = ntohl(*p); p++; + if (!svcxdr_decode_nfs_fh3(xdr, &args->fh)) + return false; + if (xdr_stream_decode_u32(xdr, &args->mask) < 0) + return false; - return xdr_argsize_check(rqstp, p); + return true; } - -static int nfs3svc_decode_setaclargs(struct svc_rqst *rqstp, __be32 *p) +static bool +nfs3svc_decode_setaclargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - struct nfsd3_setaclargs *args = rqstp->rq_argp; - struct kvec *head = rqstp->rq_arg.head; - unsigned int base; - int n; + struct nfsd3_setaclargs *argp = rqstp->rq_argp; - p = nfs3svc_decode_fh(p, &args->fh); - if (!p) - return 0; - args->mask = ntohl(*p++); - if (args->mask & ~NFS_ACL_MASK || - !xdr_argsize_check(rqstp, p)) - return 0; - - base = (char *)p - (char *)head->iov_base; - n = nfsacl_decode(&rqstp->rq_arg, base, NULL, - (args->mask & NFS_ACL) ? - &args->acl_access : NULL); - if (n > 0) - n = nfsacl_decode(&rqstp->rq_arg, base + n, NULL, - (args->mask & NFS_DFACL) ? - &args->acl_default : NULL); - return (n > 0); + if (!svcxdr_decode_nfs_fh3(xdr, &argp->fh)) + return false; + if (xdr_stream_decode_u32(xdr, &argp->mask) < 0) + return false; + if (argp->mask & ~NFS_ACL_MASK) + return false; + if (!nfs_stream_decode_acl(xdr, NULL, (argp->mask & NFS_ACL) ? + &argp->acl_access : NULL)) + return false; + if (!nfs_stream_decode_acl(xdr, NULL, (argp->mask & NFS_DFACL) ? + &argp->acl_default : NULL)) + return false; + + return true; } /* @@ -169,30 +166,35 @@ static int nfs3svc_decode_setaclargs(struct svc_rqst *rqstp, __be32 *p) */ /* GETACL */ -static int nfs3svc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p) +static bool +nfs3svc_encode_getaclres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nfsd3_getaclres *resp = rqstp->rq_resp; struct dentry *dentry = resp->fh.fh_dentry; + struct kvec *head = rqstp->rq_res.head; + struct inode *inode; + unsigned int base; + int n; + int w; - p = nfs3svc_encode_post_op_attr(rqstp, p, &resp->fh); - if (resp->status == 0 && dentry && d_really_is_positive(dentry)) { - struct inode *inode = d_inode(dentry); - struct kvec *head = rqstp->rq_res.head; - unsigned int base; - int n; - int w; + if (!svcxdr_encode_nfsstat3(xdr, resp->status)) + return false; + switch (resp->status) { + case nfs_ok: + inode = d_inode(dentry); + if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh)) + return false; + if (xdr_stream_encode_u32(xdr, resp->mask) < 0) + return false; - *p++ = htonl(resp->mask); - if (!xdr_ressize_check(rqstp, p)) - return 0; - base = (char *)p - (char *)head->iov_base; + base = (char *)xdr->p - (char *)head->iov_base; rqstp->rq_res.page_len = w = nfsacl_size( (resp->mask & NFS_ACL) ? resp->acl_access : NULL, (resp->mask & NFS_DFACL) ? resp->acl_default : NULL); while (w > 0) { if (!*(rqstp->rq_next_page++)) - return 0; + return false; w -= PAGE_SIZE; } @@ -205,22 +207,24 @@ static int nfs3svc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p) resp->mask & NFS_DFACL, NFS_ACL_DEFAULT); if (n <= 0) - return 0; - } else - if (!xdr_ressize_check(rqstp, p)) - return 0; + return false; + break; + default: + if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh)) + return false; + } - return 1; + return true; } /* SETACL */ -static int nfs3svc_encode_setaclres(struct svc_rqst *rqstp, __be32 *p) +static bool +nfs3svc_encode_setaclres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nfsd3_attrstat *resp = rqstp->rq_resp; - p = nfs3svc_encode_post_op_attr(rqstp, p, &resp->fh); - - return xdr_ressize_check(rqstp, p); + return svcxdr_encode_nfsstat3(xdr, resp->status) && + svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh); } /* @@ -235,33 +239,49 @@ static void nfs3svc_release_getacl(struct svc_rqst *rqstp) posix_acl_release(resp->acl_default); } -#define nfs3svc_decode_voidargs NULL -#define nfs3svc_release_void NULL -#define nfsd3_setaclres nfsd3_attrstat -#define nfsd3_voidres nfsd3_voidargs struct nfsd3_voidargs { int dummy; }; -#define PROC(name, argt, rest, relt, cache, respsize) \ -{ \ - .pc_func = nfsd3_proc_##name, \ - .pc_decode = nfs3svc_decode_##argt##args, \ - .pc_encode = nfs3svc_encode_##rest##res, \ - .pc_release = nfs3svc_release_##relt, \ - .pc_argsize = sizeof(struct nfsd3_##argt##args), \ - .pc_ressize = sizeof(struct nfsd3_##rest##res), \ - .pc_cachetype = cache, \ - .pc_xdrressize = respsize, \ -} - #define ST 1 /* status*/ #define AT 21 /* attributes */ #define pAT (1+AT) /* post attributes - conditional */ #define ACL (1+NFS_ACL_MAX_ENTRIES*3) /* Access Control List */ -static const struct svc_procedure nfsd_acl_procedures3[] = { - PROC(null, void, void, void, RC_NOCACHE, ST), - PROC(getacl, getacl, getacl, getacl, RC_NOCACHE, ST+1+2*(1+ACL)), - PROC(setacl, setacl, setacl, fhandle, RC_NOCACHE, ST+pAT), +static const struct svc_procedure nfsd_acl_procedures3[3] = { + [ACLPROC3_NULL] = { + .pc_func = nfsd3_proc_null, + .pc_decode = nfssvc_decode_voidarg, + .pc_encode = nfssvc_encode_voidres, + .pc_argsize = sizeof(struct nfsd_voidargs), + .pc_argzero = sizeof(struct nfsd_voidargs), + .pc_ressize = sizeof(struct nfsd_voidres), + .pc_cachetype = RC_NOCACHE, + .pc_xdrressize = ST, + .pc_name = "NULL", + }, + [ACLPROC3_GETACL] = { + .pc_func = nfsd3_proc_getacl, + .pc_decode = nfs3svc_decode_getaclargs, + .pc_encode = nfs3svc_encode_getaclres, + .pc_release = nfs3svc_release_getacl, + .pc_argsize = sizeof(struct nfsd3_getaclargs), + .pc_argzero = sizeof(struct nfsd3_getaclargs), + .pc_ressize = sizeof(struct nfsd3_getaclres), + .pc_cachetype = RC_NOCACHE, + .pc_xdrressize = ST+1+2*(1+ACL), + .pc_name = "GETACL", + }, + [ACLPROC3_SETACL] = { + .pc_func = nfsd3_proc_setacl, + .pc_decode = nfs3svc_decode_setaclargs, + .pc_encode = nfs3svc_encode_setaclres, + .pc_release = nfs3svc_release_fhandle, + .pc_argsize = sizeof(struct nfsd3_setaclargs), + .pc_argzero = sizeof(struct nfsd3_setaclargs), + .pc_ressize = sizeof(struct nfsd3_attrstat), + .pc_cachetype = RC_NOCACHE, + .pc_xdrressize = ST+pAT, + .pc_name = "SETACL", + }, }; static unsigned int nfsd_acl_count3[ARRAY_SIZE(nfsd_acl_procedures3)]; diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c index 288bc76b4574..923d9a80df92 100644 --- a/fs/nfsd/nfs3proc.c +++ b/fs/nfsd/nfs3proc.c @@ -8,6 +8,7 @@ #include <linux/fs.h> #include <linux/ext2_fs.h> #include <linux/magic.h> +#include <linux/namei.h> #include "cache.h" #include "xdr3.h" @@ -15,8 +16,6 @@ #define NFSDDBG_FACILITY NFSDDBG_PROC -#define RETURN_STATUS(st) { resp->status = (st); return (st); } - static int nfs3_ftypes[] = { 0, /* NF3NON */ S_IFREG, /* NF3REG */ @@ -34,7 +33,7 @@ static int nfs3_ftypes[] = { static __be32 nfsd3_proc_null(struct svc_rqst *rqstp) { - return nfs_ok; + return rpc_success; } /* @@ -45,20 +44,19 @@ nfsd3_proc_getattr(struct svc_rqst *rqstp) { struct nfsd_fhandle *argp = rqstp->rq_argp; struct nfsd3_attrstat *resp = rqstp->rq_resp; - __be32 nfserr; dprintk("nfsd: GETATTR(3) %s\n", SVCFH_fmt(&argp->fh)); fh_copy(&resp->fh, &argp->fh); - nfserr = fh_verify(rqstp, &resp->fh, 0, - NFSD_MAY_NOP | NFSD_MAY_BYPASS_GSS_ON_ROOT); - if (nfserr) - RETURN_STATUS(nfserr); - - nfserr = fh_getattr(&resp->fh, &resp->stat); - - RETURN_STATUS(nfserr); + resp->status = fh_verify(rqstp, &resp->fh, 0, + NFSD_MAY_NOP | NFSD_MAY_BYPASS_GSS_ON_ROOT); + if (resp->status != nfs_ok) + goto out; + + resp->status = fh_getattr(&resp->fh, &resp->stat); +out: + return rpc_success; } /* @@ -69,15 +67,17 @@ nfsd3_proc_setattr(struct svc_rqst *rqstp) { struct nfsd3_sattrargs *argp = rqstp->rq_argp; struct nfsd3_attrstat *resp = rqstp->rq_resp; - __be32 nfserr; + struct nfsd_attrs attrs = { + .na_iattr = &argp->attrs, + }; dprintk("nfsd: SETATTR(3) %s\n", SVCFH_fmt(&argp->fh)); fh_copy(&resp->fh, &argp->fh); - nfserr = nfsd_setattr(rqstp, &resp->fh, &argp->attrs, - argp->check_guard, argp->guardtime); - RETURN_STATUS(nfserr); + resp->status = nfsd_setattr(rqstp, &resp->fh, &attrs, + argp->check_guard, argp->guardtime); + return rpc_success; } /* @@ -88,7 +88,6 @@ nfsd3_proc_lookup(struct svc_rqst *rqstp) { struct nfsd3_diropargs *argp = rqstp->rq_argp; struct nfsd3_diropres *resp = rqstp->rq_resp; - __be32 nfserr; dprintk("nfsd: LOOKUP(3) %s %.*s\n", SVCFH_fmt(&argp->fh), @@ -98,11 +97,10 @@ nfsd3_proc_lookup(struct svc_rqst *rqstp) fh_copy(&resp->dirfh, &argp->fh); fh_init(&resp->fh, NFS3_FHSIZE); - nfserr = nfsd_lookup(rqstp, &resp->dirfh, - argp->name, - argp->len, - &resp->fh); - RETURN_STATUS(nfserr); + resp->status = nfsd_lookup(rqstp, &resp->dirfh, + argp->name, argp->len, + &resp->fh); + return rpc_success; } /* @@ -113,7 +111,6 @@ nfsd3_proc_access(struct svc_rqst *rqstp) { struct nfsd3_accessargs *argp = rqstp->rq_argp; struct nfsd3_accessres *resp = rqstp->rq_resp; - __be32 nfserr; dprintk("nfsd: ACCESS(3) %s 0x%x\n", SVCFH_fmt(&argp->fh), @@ -121,8 +118,8 @@ nfsd3_proc_access(struct svc_rqst *rqstp) fh_copy(&resp->fh, &argp->fh); resp->access = argp->access; - nfserr = nfsd_access(rqstp, &resp->fh, &resp->access, NULL); - RETURN_STATUS(nfserr); + resp->status = nfsd_access(rqstp, &resp->fh, &resp->access, NULL); + return rpc_success; } /* @@ -131,17 +128,18 @@ nfsd3_proc_access(struct svc_rqst *rqstp) static __be32 nfsd3_proc_readlink(struct svc_rqst *rqstp) { - struct nfsd3_readlinkargs *argp = rqstp->rq_argp; + struct nfsd_fhandle *argp = rqstp->rq_argp; struct nfsd3_readlinkres *resp = rqstp->rq_resp; - __be32 nfserr; dprintk("nfsd: READLINK(3) %s\n", SVCFH_fmt(&argp->fh)); /* Read the symlink. */ fh_copy(&resp->fh, &argp->fh); resp->len = NFS3_MAXPATHLEN; - nfserr = nfsd_readlink(rqstp, &resp->fh, argp->buffer, &resp->len); - RETURN_STATUS(nfserr); + resp->pages = rqstp->rq_next_page++; + resp->status = nfsd_readlink(rqstp, &resp->fh, + page_address(*resp->pages), &resp->len); + return rpc_success; } /* @@ -152,29 +150,44 @@ nfsd3_proc_read(struct svc_rqst *rqstp) { struct nfsd3_readargs *argp = rqstp->rq_argp; struct nfsd3_readres *resp = rqstp->rq_resp; - __be32 nfserr; - u32 max_blocksize = svc_max_payload(rqstp); - unsigned long cnt = min(argp->count, max_blocksize); + unsigned int len; + int v; dprintk("nfsd: READ(3) %s %lu bytes at %Lu\n", SVCFH_fmt(&argp->fh), (unsigned long) argp->count, (unsigned long long) argp->offset); + argp->count = min_t(u32, argp->count, svc_max_payload(rqstp)); + argp->count = min_t(u32, argp->count, rqstp->rq_res.buflen); + if (argp->offset > (u64)OFFSET_MAX) + argp->offset = (u64)OFFSET_MAX; + if (argp->offset + argp->count > (u64)OFFSET_MAX) + argp->count = (u64)OFFSET_MAX - argp->offset; + + v = 0; + len = argp->count; + resp->pages = rqstp->rq_next_page; + while (len > 0) { + struct page *page = *(rqstp->rq_next_page++); + + rqstp->rq_vec[v].iov_base = page_address(page); + rqstp->rq_vec[v].iov_len = min_t(unsigned int, len, PAGE_SIZE); + len -= rqstp->rq_vec[v].iov_len; + v++; + } + /* Obtain buffer pointer for payload. * 1 (status) + 22 (post_op_attr) + 1 (count) + 1 (eof) * + 1 (xdr opaque byte count) = 26 */ - resp->count = cnt; + resp->count = argp->count; svc_reserve_auth(rqstp, ((1 + NFS3_POST_OP_ATTR_WORDS + 3)<<2) + resp->count +4); fh_copy(&resp->fh, &argp->fh); - nfserr = nfsd_read(rqstp, &resp->fh, - argp->offset, - rqstp->rq_vec, argp->vlen, - &resp->count, - &resp->eof); - RETURN_STATUS(nfserr); + resp->status = nfsd_read(rqstp, &resp->fh, argp->offset, + rqstp->rq_vec, v, &resp->count, &resp->eof); + return rpc_success; } /* @@ -185,7 +198,6 @@ nfsd3_proc_write(struct svc_rqst *rqstp) { struct nfsd3_writeargs *argp = rqstp->rq_argp; struct nfsd3_writeres *resp = rqstp->rq_resp; - __be32 nfserr; unsigned long cnt = argp->len; unsigned int nvecs; @@ -195,32 +207,154 @@ nfsd3_proc_write(struct svc_rqst *rqstp) (unsigned long long) argp->offset, argp->stable? " stable" : ""); + resp->status = nfserr_fbig; + if (argp->offset > (u64)OFFSET_MAX || + argp->offset + argp->len > (u64)OFFSET_MAX) + return rpc_success; + fh_copy(&resp->fh, &argp->fh); resp->committed = argp->stable; - nvecs = svc_fill_write_vector(rqstp, rqstp->rq_arg.pages, - &argp->first, cnt); - if (!nvecs) - RETURN_STATUS(nfserr_io); - nfserr = nfsd_write(rqstp, &resp->fh, argp->offset, - rqstp->rq_vec, nvecs, &cnt, - resp->committed, resp->verf); + nvecs = svc_fill_write_vector(rqstp, &argp->payload); + + resp->status = nfsd_write(rqstp, &resp->fh, argp->offset, + rqstp->rq_vec, nvecs, &cnt, + resp->committed, resp->verf); resp->count = cnt; - RETURN_STATUS(nfserr); + return rpc_success; } /* - * With NFSv3, CREATE processing is a lot easier than with NFSv2. - * At least in theory; we'll see how it fares in practice when the - * first reports about SunOS compatibility problems start to pour in... + * Implement NFSv3's unchecked, guarded, and exclusive CREATE + * semantics for regular files. Except for the created file, + * this operation is stateless on the server. + * + * Upon return, caller must release @fhp and @resfhp. */ static __be32 +nfsd3_create_file(struct svc_rqst *rqstp, struct svc_fh *fhp, + struct svc_fh *resfhp, struct nfsd3_createargs *argp) +{ + struct iattr *iap = &argp->attrs; + struct dentry *parent, *child; + struct nfsd_attrs attrs = { + .na_iattr = iap, + }; + __u32 v_mtime, v_atime; + struct inode *inode; + __be32 status; + int host_err; + + if (isdotent(argp->name, argp->len)) + return nfserr_exist; + if (!(iap->ia_valid & ATTR_MODE)) + iap->ia_mode = 0; + + status = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_EXEC); + if (status != nfs_ok) + return status; + + parent = fhp->fh_dentry; + inode = d_inode(parent); + + host_err = fh_want_write(fhp); + if (host_err) + return nfserrno(host_err); + + inode_lock_nested(inode, I_MUTEX_PARENT); + + child = lookup_one_len(argp->name, parent, argp->len); + if (IS_ERR(child)) { + status = nfserrno(PTR_ERR(child)); + goto out; + } + + if (d_really_is_negative(child)) { + status = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_CREATE); + if (status != nfs_ok) + goto out; + } + + status = fh_compose(resfhp, fhp->fh_export, child, fhp); + if (status != nfs_ok) + goto out; + + v_mtime = 0; + v_atime = 0; + if (argp->createmode == NFS3_CREATE_EXCLUSIVE) { + u32 *verifier = (u32 *)argp->verf; + + /* + * Solaris 7 gets confused (bugid 4218508) if these have + * the high bit set, as do xfs filesystems without the + * "bigtime" feature. So just clear the high bits. + */ + v_mtime = verifier[0] & 0x7fffffff; + v_atime = verifier[1] & 0x7fffffff; + } + + if (d_really_is_positive(child)) { + status = nfs_ok; + + switch (argp->createmode) { + case NFS3_CREATE_UNCHECKED: + if (!d_is_reg(child)) + break; + iap->ia_valid &= ATTR_SIZE; + goto set_attr; + case NFS3_CREATE_GUARDED: + status = nfserr_exist; + break; + case NFS3_CREATE_EXCLUSIVE: + if (d_inode(child)->i_mtime.tv_sec == v_mtime && + d_inode(child)->i_atime.tv_sec == v_atime && + d_inode(child)->i_size == 0) { + break; + } + status = nfserr_exist; + } + goto out; + } + + if (!IS_POSIXACL(inode)) + iap->ia_mode &= ~current_umask(); + + fh_fill_pre_attrs(fhp); + host_err = vfs_create(&init_user_ns, inode, child, iap->ia_mode, true); + if (host_err < 0) { + status = nfserrno(host_err); + goto out; + } + fh_fill_post_attrs(fhp); + + /* A newly created file already has a file size of zero. */ + if ((iap->ia_valid & ATTR_SIZE) && (iap->ia_size == 0)) + iap->ia_valid &= ~ATTR_SIZE; + if (argp->createmode == NFS3_CREATE_EXCLUSIVE) { + iap->ia_valid = ATTR_MTIME | ATTR_ATIME | + ATTR_MTIME_SET | ATTR_ATIME_SET; + iap->ia_mtime.tv_sec = v_mtime; + iap->ia_atime.tv_sec = v_atime; + iap->ia_mtime.tv_nsec = 0; + iap->ia_atime.tv_nsec = 0; + } + +set_attr: + status = nfsd_create_setattr(rqstp, fhp, resfhp, &attrs); + +out: + inode_unlock(inode); + if (child && !IS_ERR(child)) + dput(child); + fh_drop_write(fhp); + return status; +} + +static __be32 nfsd3_proc_create(struct svc_rqst *rqstp) { struct nfsd3_createargs *argp = rqstp->rq_argp; struct nfsd3_diropres *resp = rqstp->rq_resp; - svc_fh *dirfhp, *newfhp = NULL; - struct iattr *attr; - __be32 nfserr; + svc_fh *dirfhp, *newfhp; dprintk("nfsd: CREATE(3) %s %.*s\n", SVCFH_fmt(&argp->fh), @@ -229,23 +363,9 @@ nfsd3_proc_create(struct svc_rqst *rqstp) dirfhp = fh_copy(&resp->dirfh, &argp->fh); newfhp = fh_init(&resp->fh, NFS3_FHSIZE); - attr = &argp->attrs; - - /* Unfudge the mode bits */ - attr->ia_mode &= ~S_IFMT; - if (!(attr->ia_valid & ATTR_MODE)) { - attr->ia_valid |= ATTR_MODE; - attr->ia_mode = S_IFREG; - } else { - attr->ia_mode = (attr->ia_mode & ~S_IFMT) | S_IFREG; - } - /* Now create the file and set attributes */ - nfserr = do_nfsd_create(rqstp, dirfhp, argp->name, argp->len, - attr, newfhp, - argp->createmode, (u32 *)argp->verf, NULL, NULL); - - RETURN_STATUS(nfserr); + resp->status = nfsd3_create_file(rqstp, dirfhp, newfhp, argp); + return rpc_success; } /* @@ -256,7 +376,9 @@ nfsd3_proc_mkdir(struct svc_rqst *rqstp) { struct nfsd3_createargs *argp = rqstp->rq_argp; struct nfsd3_diropres *resp = rqstp->rq_resp; - __be32 nfserr; + struct nfsd_attrs attrs = { + .na_iattr = &argp->attrs, + }; dprintk("nfsd: MKDIR(3) %s %.*s\n", SVCFH_fmt(&argp->fh), @@ -266,10 +388,9 @@ nfsd3_proc_mkdir(struct svc_rqst *rqstp) argp->attrs.ia_valid &= ~ATTR_SIZE; fh_copy(&resp->dirfh, &argp->fh); fh_init(&resp->fh, NFS3_FHSIZE); - nfserr = nfsd_create(rqstp, &resp->dirfh, argp->name, argp->len, - &argp->attrs, S_IFDIR, 0, &resp->fh); - fh_unlock(&resp->dirfh); - RETURN_STATUS(nfserr); + resp->status = nfsd_create(rqstp, &resp->dirfh, argp->name, argp->len, + &attrs, S_IFDIR, 0, &resp->fh); + return rpc_success; } static __be32 @@ -277,18 +398,26 @@ nfsd3_proc_symlink(struct svc_rqst *rqstp) { struct nfsd3_symlinkargs *argp = rqstp->rq_argp; struct nfsd3_diropres *resp = rqstp->rq_resp; - __be32 nfserr; + struct nfsd_attrs attrs = { + .na_iattr = &argp->attrs, + }; - if (argp->tlen == 0) - RETURN_STATUS(nfserr_inval); - if (argp->tlen > NFS3_MAXPATHLEN) - RETURN_STATUS(nfserr_nametoolong); + if (argp->tlen == 0) { + resp->status = nfserr_inval; + goto out; + } + if (argp->tlen > NFS3_MAXPATHLEN) { + resp->status = nfserr_nametoolong; + goto out; + } argp->tname = svc_fill_symlink_pathname(rqstp, &argp->first, page_address(rqstp->rq_arg.pages[0]), argp->tlen); - if (IS_ERR(argp->tname)) - RETURN_STATUS(nfserrno(PTR_ERR(argp->tname))); + if (IS_ERR(argp->tname)) { + resp->status = nfserrno(PTR_ERR(argp->tname)); + goto out; + } dprintk("nfsd: SYMLINK(3) %s %.*s -> %.*s\n", SVCFH_fmt(&argp->ffh), @@ -297,10 +426,11 @@ nfsd3_proc_symlink(struct svc_rqst *rqstp) fh_copy(&resp->dirfh, &argp->ffh); fh_init(&resp->fh, NFS3_FHSIZE); - nfserr = nfsd_symlink(rqstp, &resp->dirfh, argp->fname, argp->flen, - argp->tname, &resp->fh); + resp->status = nfsd_symlink(rqstp, &resp->dirfh, argp->fname, + argp->flen, argp->tname, &attrs, &resp->fh); kfree(argp->tname); - RETURN_STATUS(nfserr); +out: + return rpc_success; } /* @@ -311,7 +441,9 @@ nfsd3_proc_mknod(struct svc_rqst *rqstp) { struct nfsd3_mknodargs *argp = rqstp->rq_argp; struct nfsd3_diropres *resp = rqstp->rq_resp; - __be32 nfserr; + struct nfsd_attrs attrs = { + .na_iattr = &argp->attrs, + }; int type; dev_t rdev = 0; @@ -323,22 +455,23 @@ nfsd3_proc_mknod(struct svc_rqst *rqstp) fh_copy(&resp->dirfh, &argp->fh); fh_init(&resp->fh, NFS3_FHSIZE); - if (argp->ftype == 0 || argp->ftype >= NF3BAD) - RETURN_STATUS(nfserr_inval); if (argp->ftype == NF3CHR || argp->ftype == NF3BLK) { rdev = MKDEV(argp->major, argp->minor); if (MAJOR(rdev) != argp->major || - MINOR(rdev) != argp->minor) - RETURN_STATUS(nfserr_inval); - } else - if (argp->ftype != NF3SOCK && argp->ftype != NF3FIFO) - RETURN_STATUS(nfserr_inval); + MINOR(rdev) != argp->minor) { + resp->status = nfserr_inval; + goto out; + } + } else if (argp->ftype != NF3SOCK && argp->ftype != NF3FIFO) { + resp->status = nfserr_badtype; + goto out; + } type = nfs3_ftypes[argp->ftype]; - nfserr = nfsd_create(rqstp, &resp->dirfh, argp->name, argp->len, - &argp->attrs, type, rdev, &resp->fh); - fh_unlock(&resp->dirfh); - RETURN_STATUS(nfserr); + resp->status = nfsd_create(rqstp, &resp->dirfh, argp->name, argp->len, + &attrs, type, rdev, &resp->fh); +out: + return rpc_success; } /* @@ -349,7 +482,6 @@ nfsd3_proc_remove(struct svc_rqst *rqstp) { struct nfsd3_diropargs *argp = rqstp->rq_argp; struct nfsd3_attrstat *resp = rqstp->rq_resp; - __be32 nfserr; dprintk("nfsd: REMOVE(3) %s %.*s\n", SVCFH_fmt(&argp->fh), @@ -358,9 +490,9 @@ nfsd3_proc_remove(struct svc_rqst *rqstp) /* Unlink. -S_IFDIR means file must not be a directory */ fh_copy(&resp->fh, &argp->fh); - nfserr = nfsd_unlink(rqstp, &resp->fh, -S_IFDIR, argp->name, argp->len); - fh_unlock(&resp->fh); - RETURN_STATUS(nfserr); + resp->status = nfsd_unlink(rqstp, &resp->fh, -S_IFDIR, + argp->name, argp->len); + return rpc_success; } /* @@ -371,7 +503,6 @@ nfsd3_proc_rmdir(struct svc_rqst *rqstp) { struct nfsd3_diropargs *argp = rqstp->rq_argp; struct nfsd3_attrstat *resp = rqstp->rq_resp; - __be32 nfserr; dprintk("nfsd: RMDIR(3) %s %.*s\n", SVCFH_fmt(&argp->fh), @@ -379,9 +510,9 @@ nfsd3_proc_rmdir(struct svc_rqst *rqstp) argp->name); fh_copy(&resp->fh, &argp->fh); - nfserr = nfsd_unlink(rqstp, &resp->fh, S_IFDIR, argp->name, argp->len); - fh_unlock(&resp->fh); - RETURN_STATUS(nfserr); + resp->status = nfsd_unlink(rqstp, &resp->fh, S_IFDIR, + argp->name, argp->len); + return rpc_success; } static __be32 @@ -389,7 +520,6 @@ nfsd3_proc_rename(struct svc_rqst *rqstp) { struct nfsd3_renameargs *argp = rqstp->rq_argp; struct nfsd3_renameres *resp = rqstp->rq_resp; - __be32 nfserr; dprintk("nfsd: RENAME(3) %s %.*s ->\n", SVCFH_fmt(&argp->ffh), @@ -402,9 +532,9 @@ nfsd3_proc_rename(struct svc_rqst *rqstp) fh_copy(&resp->ffh, &argp->ffh); fh_copy(&resp->tfh, &argp->tfh); - nfserr = nfsd_rename(rqstp, &resp->ffh, argp->fname, argp->flen, - &resp->tfh, argp->tname, argp->tlen); - RETURN_STATUS(nfserr); + resp->status = nfsd_rename(rqstp, &resp->ffh, argp->fname, argp->flen, + &resp->tfh, argp->tname, argp->tlen); + return rpc_success; } static __be32 @@ -412,7 +542,6 @@ nfsd3_proc_link(struct svc_rqst *rqstp) { struct nfsd3_linkargs *argp = rqstp->rq_argp; struct nfsd3_linkres *resp = rqstp->rq_resp; - __be32 nfserr; dprintk("nfsd: LINK(3) %s ->\n", SVCFH_fmt(&argp->ffh)); @@ -423,9 +552,29 @@ nfsd3_proc_link(struct svc_rqst *rqstp) fh_copy(&resp->fh, &argp->ffh); fh_copy(&resp->tfh, &argp->tfh); - nfserr = nfsd_link(rqstp, &resp->tfh, argp->tname, argp->tlen, - &resp->fh); - RETURN_STATUS(nfserr); + resp->status = nfsd_link(rqstp, &resp->tfh, argp->tname, argp->tlen, + &resp->fh); + return rpc_success; +} + +static void nfsd3_init_dirlist_pages(struct svc_rqst *rqstp, + struct nfsd3_readdirres *resp, + u32 count) +{ + struct xdr_buf *buf = &resp->dirlist; + struct xdr_stream *xdr = &resp->xdr; + unsigned int sendbuf = min_t(unsigned int, rqstp->rq_res.buflen, + svc_max_payload(rqstp)); + + memset(buf, 0, sizeof(*buf)); + + /* Reserve room for the NULL ptr & eof flag (-2 words) */ + buf->buflen = clamp(count, (u32)(XDR_UNIT * 2), sendbuf); + buf->buflen -= XDR_UNIT * 2; + buf->pages = rqstp->rq_next_page; + rqstp->rq_next_page += (buf->buflen + PAGE_SIZE - 1) >> PAGE_SHIFT; + + xdr_init_encode_pages(xdr, buf, buf->pages, NULL); } /* @@ -436,56 +585,28 @@ nfsd3_proc_readdir(struct svc_rqst *rqstp) { struct nfsd3_readdirargs *argp = rqstp->rq_argp; struct nfsd3_readdirres *resp = rqstp->rq_resp; - __be32 nfserr; - int count = 0; - struct page **p; - caddr_t page_addr = NULL; + loff_t offset; dprintk("nfsd: READDIR(3) %s %d bytes at %d\n", SVCFH_fmt(&argp->fh), argp->count, (u32) argp->cookie); - /* Make sure we've room for the NULL ptr & eof flag, and shrink to - * client read size */ - count = (argp->count >> 2) - 2; + nfsd3_init_dirlist_pages(rqstp, resp, argp->count); - /* Read directory and encode entries on the fly */ fh_copy(&resp->fh, &argp->fh); - - resp->buflen = count; resp->common.err = nfs_ok; - resp->buffer = argp->buffer; + resp->cookie_offset = 0; resp->rqstp = rqstp; - nfserr = nfsd_readdir(rqstp, &resp->fh, (loff_t*) &argp->cookie, - &resp->common, nfs3svc_encode_entry); + offset = argp->cookie; + resp->status = nfsd_readdir(rqstp, &resp->fh, &offset, + &resp->common, nfs3svc_encode_entry3); memcpy(resp->verf, argp->verf, 8); - count = 0; - for (p = rqstp->rq_respages + 1; p < rqstp->rq_next_page; p++) { - page_addr = page_address(*p); + nfs3svc_encode_cookie3(resp, offset); - if (((caddr_t)resp->buffer >= page_addr) && - ((caddr_t)resp->buffer < page_addr + PAGE_SIZE)) { - count += (caddr_t)resp->buffer - page_addr; - break; - } - count += PAGE_SIZE; - } - resp->count = count >> 2; - if (resp->offset) { - loff_t offset = argp->cookie; - - if (unlikely(resp->offset1)) { - /* we ended up with offset on a page boundary */ - *resp->offset = htonl(offset >> 32); - *resp->offset1 = htonl(offset & 0xffffffff); - resp->offset1 = NULL; - } else { - xdr_encode_hyper(resp->offset, offset); - } - resp->offset = NULL; - } + /* Recycle only pages that were part of the reply */ + rqstp->rq_next_page = resp->xdr.page_ptr + 1; - RETURN_STATUS(nfserr); + return rpc_success; } /* @@ -497,65 +618,39 @@ nfsd3_proc_readdirplus(struct svc_rqst *rqstp) { struct nfsd3_readdirargs *argp = rqstp->rq_argp; struct nfsd3_readdirres *resp = rqstp->rq_resp; - __be32 nfserr; - int count = 0; loff_t offset; - struct page **p; - caddr_t page_addr = NULL; dprintk("nfsd: READDIR+(3) %s %d bytes at %d\n", SVCFH_fmt(&argp->fh), argp->count, (u32) argp->cookie); - /* Convert byte count to number of words (i.e. >> 2), - * and reserve room for the NULL ptr & eof flag (-2 words) */ - resp->count = (argp->count >> 2) - 2; + nfsd3_init_dirlist_pages(rqstp, resp, argp->count); - /* Read directory and encode entries on the fly */ fh_copy(&resp->fh, &argp->fh); - resp->common.err = nfs_ok; - resp->buffer = argp->buffer; - resp->buflen = resp->count; + resp->cookie_offset = 0; resp->rqstp = rqstp; offset = argp->cookie; - nfserr = fh_verify(rqstp, &resp->fh, S_IFDIR, NFSD_MAY_NOP); - if (nfserr) - RETURN_STATUS(nfserr); + resp->status = fh_verify(rqstp, &resp->fh, S_IFDIR, NFSD_MAY_NOP); + if (resp->status != nfs_ok) + goto out; - if (resp->fh.fh_export->ex_flags & NFSEXP_NOREADDIRPLUS) - RETURN_STATUS(nfserr_notsupp); + if (resp->fh.fh_export->ex_flags & NFSEXP_NOREADDIRPLUS) { + resp->status = nfserr_notsupp; + goto out; + } - nfserr = nfsd_readdir(rqstp, &resp->fh, - &offset, - &resp->common, - nfs3svc_encode_entry_plus); + resp->status = nfsd_readdir(rqstp, &resp->fh, &offset, + &resp->common, nfs3svc_encode_entryplus3); memcpy(resp->verf, argp->verf, 8); - for (p = rqstp->rq_respages + 1; p < rqstp->rq_next_page; p++) { - page_addr = page_address(*p); + nfs3svc_encode_cookie3(resp, offset); - if (((caddr_t)resp->buffer >= page_addr) && - ((caddr_t)resp->buffer < page_addr + PAGE_SIZE)) { - count += (caddr_t)resp->buffer - page_addr; - break; - } - count += PAGE_SIZE; - } - resp->count = count >> 2; - if (resp->offset) { - if (unlikely(resp->offset1)) { - /* we ended up with offset on a page boundary */ - *resp->offset = htonl(offset >> 32); - *resp->offset1 = htonl(offset & 0xffffffff); - resp->offset1 = NULL; - } else { - xdr_encode_hyper(resp->offset, offset); - } - resp->offset = NULL; - } + /* Recycle only pages that were part of the reply */ + rqstp->rq_next_page = resp->xdr.page_ptr + 1; - RETURN_STATUS(nfserr); +out: + return rpc_success; } /* @@ -566,14 +661,13 @@ nfsd3_proc_fsstat(struct svc_rqst *rqstp) { struct nfsd_fhandle *argp = rqstp->rq_argp; struct nfsd3_fsstatres *resp = rqstp->rq_resp; - __be32 nfserr; dprintk("nfsd: FSSTAT(3) %s\n", SVCFH_fmt(&argp->fh)); - nfserr = nfsd_statfs(rqstp, &argp->fh, &resp->stats, 0); + resp->status = nfsd_statfs(rqstp, &argp->fh, &resp->stats, 0); fh_put(&argp->fh); - RETURN_STATUS(nfserr); + return rpc_success; } /* @@ -584,7 +678,6 @@ nfsd3_proc_fsinfo(struct svc_rqst *rqstp) { struct nfsd_fhandle *argp = rqstp->rq_argp; struct nfsd3_fsinfores *resp = rqstp->rq_resp; - __be32 nfserr; u32 max_blocksize = svc_max_payload(rqstp); dprintk("nfsd: FSINFO(3) %s\n", @@ -600,13 +693,13 @@ nfsd3_proc_fsinfo(struct svc_rqst *rqstp) resp->f_maxfilesize = ~(u32) 0; resp->f_properties = NFS3_FSF_DEFAULT; - nfserr = fh_verify(rqstp, &argp->fh, 0, - NFSD_MAY_NOP | NFSD_MAY_BYPASS_GSS_ON_ROOT); + resp->status = fh_verify(rqstp, &argp->fh, 0, + NFSD_MAY_NOP | NFSD_MAY_BYPASS_GSS_ON_ROOT); /* Check special features of the file system. May request * different read/write sizes for file systems known to have * problems with large blocks */ - if (nfserr == 0) { + if (resp->status == nfs_ok) { struct super_block *sb = argp->fh.fh_dentry->d_sb; /* Note that we don't care for remote fs's here */ @@ -617,7 +710,7 @@ nfsd3_proc_fsinfo(struct svc_rqst *rqstp) } fh_put(&argp->fh); - RETURN_STATUS(nfserr); + return rpc_success; } /* @@ -628,7 +721,6 @@ nfsd3_proc_pathconf(struct svc_rqst *rqstp) { struct nfsd_fhandle *argp = rqstp->rq_argp; struct nfsd3_pathconfres *resp = rqstp->rq_resp; - __be32 nfserr; dprintk("nfsd: PATHCONF(3) %s\n", SVCFH_fmt(&argp->fh)); @@ -641,9 +733,9 @@ nfsd3_proc_pathconf(struct svc_rqst *rqstp) resp->p_case_insensitive = 0; resp->p_case_preserving = 1; - nfserr = fh_verify(rqstp, &argp->fh, 0, NFSD_MAY_NOP); + resp->status = fh_verify(rqstp, &argp->fh, 0, NFSD_MAY_NOP); - if (nfserr == 0) { + if (resp->status == nfs_ok) { struct super_block *sb = argp->fh.fh_dentry->d_sb; /* Note that we don't care for remote fs's here */ @@ -660,10 +752,9 @@ nfsd3_proc_pathconf(struct svc_rqst *rqstp) } fh_put(&argp->fh); - RETURN_STATUS(nfserr); + return rpc_success; } - /* * Commit a file (range) to stable storage. */ @@ -672,21 +763,16 @@ nfsd3_proc_commit(struct svc_rqst *rqstp) { struct nfsd3_commitargs *argp = rqstp->rq_argp; struct nfsd3_commitres *resp = rqstp->rq_resp; - __be32 nfserr; dprintk("nfsd: COMMIT(3) %s %u@%Lu\n", SVCFH_fmt(&argp->fh), argp->count, (unsigned long long) argp->offset); - if (argp->offset > NFS_OFFSET_MAX) - RETURN_STATUS(nfserr_inval); - fh_copy(&resp->fh, &argp->fh); - nfserr = nfsd_commit(rqstp, &resp->fh, argp->offset, argp->count, - resp->verf); - - RETURN_STATUS(nfserr); + resp->status = nfsd_commit(rqstp, &resp->fh, argp->offset, + argp->count, resp->verf); + return rpc_success; } @@ -694,18 +780,14 @@ nfsd3_proc_commit(struct svc_rqst *rqstp) * NFSv3 Server procedures. * Only the results of non-idempotent operations are cached. */ -#define nfs3svc_decode_fhandleargs nfs3svc_decode_fhandle #define nfs3svc_encode_attrstatres nfs3svc_encode_attrstat #define nfs3svc_encode_wccstatres nfs3svc_encode_wccstat #define nfsd3_mkdirargs nfsd3_createargs #define nfsd3_readdirplusargs nfsd3_readdirargs #define nfsd3_fhandleargs nfsd_fhandle -#define nfsd3_fhandleres nfsd3_attrstat #define nfsd3_attrstatres nfsd3_attrstat #define nfsd3_wccstatres nfsd3_attrstat #define nfsd3_createres nfsd3_diropres -#define nfsd3_voidres nfsd3_voidargs -struct nfsd3_voidargs { int dummy; }; #define ST 1 /* status*/ #define FH 17 /* filehandle with length */ @@ -716,21 +798,26 @@ struct nfsd3_voidargs { int dummy; }; static const struct svc_procedure nfsd_procedures3[22] = { [NFS3PROC_NULL] = { .pc_func = nfsd3_proc_null, - .pc_encode = nfs3svc_encode_voidres, - .pc_argsize = sizeof(struct nfsd3_voidargs), - .pc_ressize = sizeof(struct nfsd3_voidres), + .pc_decode = nfssvc_decode_voidarg, + .pc_encode = nfssvc_encode_voidres, + .pc_argsize = sizeof(struct nfsd_voidargs), + .pc_argzero = sizeof(struct nfsd_voidargs), + .pc_ressize = sizeof(struct nfsd_voidres), .pc_cachetype = RC_NOCACHE, .pc_xdrressize = ST, + .pc_name = "NULL", }, [NFS3PROC_GETATTR] = { .pc_func = nfsd3_proc_getattr, .pc_decode = nfs3svc_decode_fhandleargs, - .pc_encode = nfs3svc_encode_attrstatres, + .pc_encode = nfs3svc_encode_getattrres, .pc_release = nfs3svc_release_fhandle, - .pc_argsize = sizeof(struct nfsd3_fhandleargs), + .pc_argsize = sizeof(struct nfsd_fhandle), + .pc_argzero = sizeof(struct nfsd_fhandle), .pc_ressize = sizeof(struct nfsd3_attrstatres), .pc_cachetype = RC_NOCACHE, .pc_xdrressize = ST+AT, + .pc_name = "GETATTR", }, [NFS3PROC_SETATTR] = { .pc_func = nfsd3_proc_setattr, @@ -738,19 +825,23 @@ static const struct svc_procedure nfsd_procedures3[22] = { .pc_encode = nfs3svc_encode_wccstatres, .pc_release = nfs3svc_release_fhandle, .pc_argsize = sizeof(struct nfsd3_sattrargs), + .pc_argzero = sizeof(struct nfsd3_sattrargs), .pc_ressize = sizeof(struct nfsd3_wccstatres), .pc_cachetype = RC_REPLBUFF, .pc_xdrressize = ST+WC, + .pc_name = "SETATTR", }, [NFS3PROC_LOOKUP] = { .pc_func = nfsd3_proc_lookup, .pc_decode = nfs3svc_decode_diropargs, - .pc_encode = nfs3svc_encode_diropres, + .pc_encode = nfs3svc_encode_lookupres, .pc_release = nfs3svc_release_fhandle2, .pc_argsize = sizeof(struct nfsd3_diropargs), + .pc_argzero = sizeof(struct nfsd3_diropargs), .pc_ressize = sizeof(struct nfsd3_diropres), .pc_cachetype = RC_NOCACHE, .pc_xdrressize = ST+FH+pAT+pAT, + .pc_name = "LOOKUP", }, [NFS3PROC_ACCESS] = { .pc_func = nfsd3_proc_access, @@ -758,19 +849,23 @@ static const struct svc_procedure nfsd_procedures3[22] = { .pc_encode = nfs3svc_encode_accessres, .pc_release = nfs3svc_release_fhandle, .pc_argsize = sizeof(struct nfsd3_accessargs), + .pc_argzero = sizeof(struct nfsd3_accessargs), .pc_ressize = sizeof(struct nfsd3_accessres), .pc_cachetype = RC_NOCACHE, .pc_xdrressize = ST+pAT+1, + .pc_name = "ACCESS", }, [NFS3PROC_READLINK] = { .pc_func = nfsd3_proc_readlink, - .pc_decode = nfs3svc_decode_readlinkargs, + .pc_decode = nfs3svc_decode_fhandleargs, .pc_encode = nfs3svc_encode_readlinkres, .pc_release = nfs3svc_release_fhandle, - .pc_argsize = sizeof(struct nfsd3_readlinkargs), + .pc_argsize = sizeof(struct nfsd_fhandle), + .pc_argzero = sizeof(struct nfsd_fhandle), .pc_ressize = sizeof(struct nfsd3_readlinkres), .pc_cachetype = RC_NOCACHE, .pc_xdrressize = ST+pAT+1+NFS3_MAXPATHLEN/4, + .pc_name = "READLINK", }, [NFS3PROC_READ] = { .pc_func = nfsd3_proc_read, @@ -778,9 +873,11 @@ static const struct svc_procedure nfsd_procedures3[22] = { .pc_encode = nfs3svc_encode_readres, .pc_release = nfs3svc_release_fhandle, .pc_argsize = sizeof(struct nfsd3_readargs), + .pc_argzero = sizeof(struct nfsd3_readargs), .pc_ressize = sizeof(struct nfsd3_readres), .pc_cachetype = RC_NOCACHE, .pc_xdrressize = ST+pAT+4+NFSSVC_MAXBLKSIZE/4, + .pc_name = "READ", }, [NFS3PROC_WRITE] = { .pc_func = nfsd3_proc_write, @@ -788,9 +885,11 @@ static const struct svc_procedure nfsd_procedures3[22] = { .pc_encode = nfs3svc_encode_writeres, .pc_release = nfs3svc_release_fhandle, .pc_argsize = sizeof(struct nfsd3_writeargs), + .pc_argzero = sizeof(struct nfsd3_writeargs), .pc_ressize = sizeof(struct nfsd3_writeres), .pc_cachetype = RC_REPLBUFF, .pc_xdrressize = ST+WC+4, + .pc_name = "WRITE", }, [NFS3PROC_CREATE] = { .pc_func = nfsd3_proc_create, @@ -798,9 +897,11 @@ static const struct svc_procedure nfsd_procedures3[22] = { .pc_encode = nfs3svc_encode_createres, .pc_release = nfs3svc_release_fhandle2, .pc_argsize = sizeof(struct nfsd3_createargs), + .pc_argzero = sizeof(struct nfsd3_createargs), .pc_ressize = sizeof(struct nfsd3_createres), .pc_cachetype = RC_REPLBUFF, .pc_xdrressize = ST+(1+FH+pAT)+WC, + .pc_name = "CREATE", }, [NFS3PROC_MKDIR] = { .pc_func = nfsd3_proc_mkdir, @@ -808,9 +909,11 @@ static const struct svc_procedure nfsd_procedures3[22] = { .pc_encode = nfs3svc_encode_createres, .pc_release = nfs3svc_release_fhandle2, .pc_argsize = sizeof(struct nfsd3_mkdirargs), + .pc_argzero = sizeof(struct nfsd3_mkdirargs), .pc_ressize = sizeof(struct nfsd3_createres), .pc_cachetype = RC_REPLBUFF, .pc_xdrressize = ST+(1+FH+pAT)+WC, + .pc_name = "MKDIR", }, [NFS3PROC_SYMLINK] = { .pc_func = nfsd3_proc_symlink, @@ -818,9 +921,11 @@ static const struct svc_procedure nfsd_procedures3[22] = { .pc_encode = nfs3svc_encode_createres, .pc_release = nfs3svc_release_fhandle2, .pc_argsize = sizeof(struct nfsd3_symlinkargs), + .pc_argzero = sizeof(struct nfsd3_symlinkargs), .pc_ressize = sizeof(struct nfsd3_createres), .pc_cachetype = RC_REPLBUFF, .pc_xdrressize = ST+(1+FH+pAT)+WC, + .pc_name = "SYMLINK", }, [NFS3PROC_MKNOD] = { .pc_func = nfsd3_proc_mknod, @@ -828,9 +933,11 @@ static const struct svc_procedure nfsd_procedures3[22] = { .pc_encode = nfs3svc_encode_createres, .pc_release = nfs3svc_release_fhandle2, .pc_argsize = sizeof(struct nfsd3_mknodargs), + .pc_argzero = sizeof(struct nfsd3_mknodargs), .pc_ressize = sizeof(struct nfsd3_createres), .pc_cachetype = RC_REPLBUFF, .pc_xdrressize = ST+(1+FH+pAT)+WC, + .pc_name = "MKNOD", }, [NFS3PROC_REMOVE] = { .pc_func = nfsd3_proc_remove, @@ -838,9 +945,11 @@ static const struct svc_procedure nfsd_procedures3[22] = { .pc_encode = nfs3svc_encode_wccstatres, .pc_release = nfs3svc_release_fhandle, .pc_argsize = sizeof(struct nfsd3_diropargs), + .pc_argzero = sizeof(struct nfsd3_diropargs), .pc_ressize = sizeof(struct nfsd3_wccstatres), .pc_cachetype = RC_REPLBUFF, .pc_xdrressize = ST+WC, + .pc_name = "REMOVE", }, [NFS3PROC_RMDIR] = { .pc_func = nfsd3_proc_rmdir, @@ -848,9 +957,11 @@ static const struct svc_procedure nfsd_procedures3[22] = { .pc_encode = nfs3svc_encode_wccstatres, .pc_release = nfs3svc_release_fhandle, .pc_argsize = sizeof(struct nfsd3_diropargs), + .pc_argzero = sizeof(struct nfsd3_diropargs), .pc_ressize = sizeof(struct nfsd3_wccstatres), .pc_cachetype = RC_REPLBUFF, .pc_xdrressize = ST+WC, + .pc_name = "RMDIR", }, [NFS3PROC_RENAME] = { .pc_func = nfsd3_proc_rename, @@ -858,9 +969,11 @@ static const struct svc_procedure nfsd_procedures3[22] = { .pc_encode = nfs3svc_encode_renameres, .pc_release = nfs3svc_release_fhandle2, .pc_argsize = sizeof(struct nfsd3_renameargs), + .pc_argzero = sizeof(struct nfsd3_renameargs), .pc_ressize = sizeof(struct nfsd3_renameres), .pc_cachetype = RC_REPLBUFF, .pc_xdrressize = ST+WC+WC, + .pc_name = "RENAME", }, [NFS3PROC_LINK] = { .pc_func = nfsd3_proc_link, @@ -868,9 +981,11 @@ static const struct svc_procedure nfsd_procedures3[22] = { .pc_encode = nfs3svc_encode_linkres, .pc_release = nfs3svc_release_fhandle2, .pc_argsize = sizeof(struct nfsd3_linkargs), + .pc_argzero = sizeof(struct nfsd3_linkargs), .pc_ressize = sizeof(struct nfsd3_linkres), .pc_cachetype = RC_REPLBUFF, .pc_xdrressize = ST+pAT+WC, + .pc_name = "LINK", }, [NFS3PROC_READDIR] = { .pc_func = nfsd3_proc_readdir, @@ -878,8 +993,10 @@ static const struct svc_procedure nfsd_procedures3[22] = { .pc_encode = nfs3svc_encode_readdirres, .pc_release = nfs3svc_release_fhandle, .pc_argsize = sizeof(struct nfsd3_readdirargs), + .pc_argzero = sizeof(struct nfsd3_readdirargs), .pc_ressize = sizeof(struct nfsd3_readdirres), .pc_cachetype = RC_NOCACHE, + .pc_name = "READDIR", }, [NFS3PROC_READDIRPLUS] = { .pc_func = nfsd3_proc_readdirplus, @@ -887,35 +1004,43 @@ static const struct svc_procedure nfsd_procedures3[22] = { .pc_encode = nfs3svc_encode_readdirres, .pc_release = nfs3svc_release_fhandle, .pc_argsize = sizeof(struct nfsd3_readdirplusargs), + .pc_argzero = sizeof(struct nfsd3_readdirplusargs), .pc_ressize = sizeof(struct nfsd3_readdirres), .pc_cachetype = RC_NOCACHE, + .pc_name = "READDIRPLUS", }, [NFS3PROC_FSSTAT] = { .pc_func = nfsd3_proc_fsstat, .pc_decode = nfs3svc_decode_fhandleargs, .pc_encode = nfs3svc_encode_fsstatres, .pc_argsize = sizeof(struct nfsd3_fhandleargs), + .pc_argzero = sizeof(struct nfsd3_fhandleargs), .pc_ressize = sizeof(struct nfsd3_fsstatres), .pc_cachetype = RC_NOCACHE, .pc_xdrressize = ST+pAT+2*6+1, + .pc_name = "FSSTAT", }, [NFS3PROC_FSINFO] = { .pc_func = nfsd3_proc_fsinfo, .pc_decode = nfs3svc_decode_fhandleargs, .pc_encode = nfs3svc_encode_fsinfores, .pc_argsize = sizeof(struct nfsd3_fhandleargs), + .pc_argzero = sizeof(struct nfsd3_fhandleargs), .pc_ressize = sizeof(struct nfsd3_fsinfores), .pc_cachetype = RC_NOCACHE, .pc_xdrressize = ST+pAT+12, + .pc_name = "FSINFO", }, [NFS3PROC_PATHCONF] = { .pc_func = nfsd3_proc_pathconf, .pc_decode = nfs3svc_decode_fhandleargs, .pc_encode = nfs3svc_encode_pathconfres, .pc_argsize = sizeof(struct nfsd3_fhandleargs), + .pc_argzero = sizeof(struct nfsd3_fhandleargs), .pc_ressize = sizeof(struct nfsd3_pathconfres), .pc_cachetype = RC_NOCACHE, .pc_xdrressize = ST+pAT+6, + .pc_name = "PATHCONF", }, [NFS3PROC_COMMIT] = { .pc_func = nfsd3_proc_commit, @@ -923,9 +1048,11 @@ static const struct svc_procedure nfsd_procedures3[22] = { .pc_encode = nfs3svc_encode_commitres, .pc_release = nfs3svc_release_fhandle, .pc_argsize = sizeof(struct nfsd3_commitargs), + .pc_argzero = sizeof(struct nfsd3_commitargs), .pc_ressize = sizeof(struct nfsd3_commitres), .pc_cachetype = RC_NOCACHE, .pc_xdrressize = ST+WC+2, + .pc_name = "COMMIT", }, }; diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index aae514d40b64..3308dd671ef0 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c @@ -14,13 +14,26 @@ #include "netns.h" #include "vfs.h" -#define NFSDDBG_FACILITY NFSDDBG_XDR +/* + * Force construction of an empty post-op attr + */ +static const struct svc_fh nfs3svc_null_fh = { + .fh_no_wcc = true, +}; +/* + * time_delta. {1, 0} means the server is accurate only + * to the nearest second. + */ +static const struct timespec64 nfs3svc_time_delta = { + .tv_sec = 1, + .tv_nsec = 0, +}; /* * Mapping of S_IF* types to NFS file types */ -static u32 nfs3_ftypes[] = { +static const u32 nfs3_ftypes[] = { NF3NON, NF3FIFO, NF3CHR, NF3BAD, NF3DIR, NF3BAD, NF3BLK, NF3BAD, NF3REG, NF3BAD, NF3LNK, NF3BAD, @@ -29,810 +42,938 @@ static u32 nfs3_ftypes[] = { /* - * XDR functions for basic NFS types + * Basic NFSv3 data types (RFC 1813 Sections 2.5 and 2.6) */ + static __be32 * -encode_time3(__be32 *p, struct timespec64 *time) +encode_nfstime3(__be32 *p, const struct timespec64 *time) { - *p++ = htonl((u32) time->tv_sec); *p++ = htonl(time->tv_nsec); + *p++ = cpu_to_be32((u32)time->tv_sec); + *p++ = cpu_to_be32(time->tv_nsec); + return p; } -static __be32 * -decode_time3(__be32 *p, struct timespec64 *time) +static bool +svcxdr_decode_nfstime3(struct xdr_stream *xdr, struct timespec64 *timep) { - time->tv_sec = ntohl(*p++); - time->tv_nsec = ntohl(*p++); - return p; + __be32 *p; + + p = xdr_inline_decode(xdr, XDR_UNIT * 2); + if (!p) + return false; + timep->tv_sec = be32_to_cpup(p++); + timep->tv_nsec = be32_to_cpup(p); + + return true; } -static __be32 * -decode_fh(__be32 *p, struct svc_fh *fhp) +/** + * svcxdr_decode_nfs_fh3 - Decode an NFSv3 file handle + * @xdr: XDR stream positioned at an undecoded NFSv3 FH + * @fhp: OUT: filled-in server file handle + * + * Return values: + * %false: The encoded file handle was not valid + * %true: @fhp has been initialized + */ +bool +svcxdr_decode_nfs_fh3(struct xdr_stream *xdr, struct svc_fh *fhp) { - unsigned int size; + __be32 *p; + u32 size; + + if (xdr_stream_decode_u32(xdr, &size) < 0) + return false; + if (size == 0 || size > NFS3_FHSIZE) + return false; + p = xdr_inline_decode(xdr, size); + if (!p) + return false; fh_init(fhp, NFS3_FHSIZE); - size = ntohl(*p++); - if (size > NFS3_FHSIZE) - return NULL; - - memcpy(&fhp->fh_handle.fh_base, p, size); fhp->fh_handle.fh_size = size; - return p + XDR_QUADLEN(size); + memcpy(&fhp->fh_handle.fh_raw, p, size); + + return true; } -/* Helper function for NFSv3 ACL code */ -__be32 *nfs3svc_decode_fh(__be32 *p, struct svc_fh *fhp) +/** + * svcxdr_encode_nfsstat3 - Encode an NFSv3 status code + * @xdr: XDR stream + * @status: status value to encode + * + * Return values: + * %false: Send buffer space was exhausted + * %true: Success + */ +bool +svcxdr_encode_nfsstat3(struct xdr_stream *xdr, __be32 status) { - return decode_fh(p, fhp); + __be32 *p; + + p = xdr_reserve_space(xdr, sizeof(status)); + if (!p) + return false; + *p = status; + + return true; } -static __be32 * -encode_fh(__be32 *p, struct svc_fh *fhp) +static bool +svcxdr_encode_nfs_fh3(struct xdr_stream *xdr, const struct svc_fh *fhp) +{ + u32 size = fhp->fh_handle.fh_size; + __be32 *p; + + p = xdr_reserve_space(xdr, XDR_UNIT + size); + if (!p) + return false; + *p++ = cpu_to_be32(size); + if (size) + p[XDR_QUADLEN(size) - 1] = 0; + memcpy(p, &fhp->fh_handle.fh_raw, size); + + return true; +} + +static bool +svcxdr_encode_post_op_fh3(struct xdr_stream *xdr, const struct svc_fh *fhp) { - unsigned int size = fhp->fh_handle.fh_size; - *p++ = htonl(size); - if (size) p[XDR_QUADLEN(size)-1]=0; - memcpy(p, &fhp->fh_handle.fh_base, size); - return p + XDR_QUADLEN(size); + if (xdr_stream_encode_item_present(xdr) < 0) + return false; + if (!svcxdr_encode_nfs_fh3(xdr, fhp)) + return false; + + return true; } -/* - * Decode a file name and make sure that the path contains - * no slashes or null bytes. - */ -static __be32 * -decode_filename(__be32 *p, char **namp, unsigned int *lenp) +static bool +svcxdr_encode_cookieverf3(struct xdr_stream *xdr, const __be32 *verf) { - char *name; - unsigned int i; - - if ((p = xdr_decode_string_inplace(p, namp, lenp, NFS3_MAXNAMLEN)) != NULL) { - for (i = 0, name = *namp; i < *lenp; i++, name++) { - if (*name == '\0' || *name == '/') - return NULL; - } + __be32 *p; + + p = xdr_reserve_space(xdr, NFS3_COOKIEVERFSIZE); + if (!p) + return false; + memcpy(p, verf, NFS3_COOKIEVERFSIZE); + + return true; +} + +static bool +svcxdr_encode_writeverf3(struct xdr_stream *xdr, const __be32 *verf) +{ + __be32 *p; + + p = xdr_reserve_space(xdr, NFS3_WRITEVERFSIZE); + if (!p) + return false; + memcpy(p, verf, NFS3_WRITEVERFSIZE); + + return true; +} + +static bool +svcxdr_decode_filename3(struct xdr_stream *xdr, char **name, unsigned int *len) +{ + u32 size, i; + __be32 *p; + char *c; + + if (xdr_stream_decode_u32(xdr, &size) < 0) + return false; + if (size == 0 || size > NFS3_MAXNAMLEN) + return false; + p = xdr_inline_decode(xdr, size); + if (!p) + return false; + + *len = size; + *name = (char *)p; + for (i = 0, c = *name; i < size; i++, c++) { + if (*c == '\0' || *c == '/') + return false; } - return p; + return true; } -static __be32 * -decode_sattr3(__be32 *p, struct iattr *iap, struct user_namespace *userns) +static bool +svcxdr_decode_diropargs3(struct xdr_stream *xdr, struct svc_fh *fhp, + char **name, unsigned int *len) { - u32 tmp; + return svcxdr_decode_nfs_fh3(xdr, fhp) && + svcxdr_decode_filename3(xdr, name, len); +} + +static bool +svcxdr_decode_sattr3(struct svc_rqst *rqstp, struct xdr_stream *xdr, + struct iattr *iap) +{ + u32 set_it; iap->ia_valid = 0; - if (*p++) { + if (xdr_stream_decode_bool(xdr, &set_it) < 0) + return false; + if (set_it) { + u32 mode; + + if (xdr_stream_decode_u32(xdr, &mode) < 0) + return false; iap->ia_valid |= ATTR_MODE; - iap->ia_mode = ntohl(*p++); + iap->ia_mode = mode; } - if (*p++) { - iap->ia_uid = make_kuid(userns, ntohl(*p++)); + if (xdr_stream_decode_bool(xdr, &set_it) < 0) + return false; + if (set_it) { + u32 uid; + + if (xdr_stream_decode_u32(xdr, &uid) < 0) + return false; + iap->ia_uid = make_kuid(nfsd_user_namespace(rqstp), uid); if (uid_valid(iap->ia_uid)) iap->ia_valid |= ATTR_UID; } - if (*p++) { - iap->ia_gid = make_kgid(userns, ntohl(*p++)); + if (xdr_stream_decode_bool(xdr, &set_it) < 0) + return false; + if (set_it) { + u32 gid; + + if (xdr_stream_decode_u32(xdr, &gid) < 0) + return false; + iap->ia_gid = make_kgid(nfsd_user_namespace(rqstp), gid); if (gid_valid(iap->ia_gid)) iap->ia_valid |= ATTR_GID; } - if (*p++) { - u64 newsize; + if (xdr_stream_decode_bool(xdr, &set_it) < 0) + return false; + if (set_it) { + u64 newsize; + if (xdr_stream_decode_u64(xdr, &newsize) < 0) + return false; iap->ia_valid |= ATTR_SIZE; - p = xdr_decode_hyper(p, &newsize); - iap->ia_size = min_t(u64, newsize, NFS_OFFSET_MAX); + iap->ia_size = newsize; } - if ((tmp = ntohl(*p++)) == 1) { /* set to server time */ + if (xdr_stream_decode_u32(xdr, &set_it) < 0) + return false; + switch (set_it) { + case DONT_CHANGE: + break; + case SET_TO_SERVER_TIME: iap->ia_valid |= ATTR_ATIME; - } else if (tmp == 2) { /* set to client time */ + break; + case SET_TO_CLIENT_TIME: + if (!svcxdr_decode_nfstime3(xdr, &iap->ia_atime)) + return false; iap->ia_valid |= ATTR_ATIME | ATTR_ATIME_SET; - iap->ia_atime.tv_sec = ntohl(*p++); - iap->ia_atime.tv_nsec = ntohl(*p++); + break; + default: + return false; } - if ((tmp = ntohl(*p++)) == 1) { /* set to server time */ + if (xdr_stream_decode_u32(xdr, &set_it) < 0) + return false; + switch (set_it) { + case DONT_CHANGE: + break; + case SET_TO_SERVER_TIME: iap->ia_valid |= ATTR_MTIME; - } else if (tmp == 2) { /* set to client time */ + break; + case SET_TO_CLIENT_TIME: + if (!svcxdr_decode_nfstime3(xdr, &iap->ia_mtime)) + return false; iap->ia_valid |= ATTR_MTIME | ATTR_MTIME_SET; - iap->ia_mtime.tv_sec = ntohl(*p++); - iap->ia_mtime.tv_nsec = ntohl(*p++); + break; + default: + return false; } - return p; + + return true; } -static __be32 *encode_fsid(__be32 *p, struct svc_fh *fhp) +static bool +svcxdr_decode_sattrguard3(struct xdr_stream *xdr, struct nfsd3_sattrargs *args) { - u64 f; - switch(fsid_source(fhp)) { - default: - case FSIDSOURCE_DEV: - p = xdr_encode_hyper(p, (u64)huge_encode_dev - (fhp->fh_dentry->d_sb->s_dev)); - break; - case FSIDSOURCE_FSID: - p = xdr_encode_hyper(p, (u64) fhp->fh_export->ex_fsid); - break; - case FSIDSOURCE_UUID: - f = ((u64*)fhp->fh_export->ex_uuid)[0]; - f ^= ((u64*)fhp->fh_export->ex_uuid)[1]; - p = xdr_encode_hyper(p, f); - break; - } - return p; + __be32 *p; + u32 check; + + if (xdr_stream_decode_bool(xdr, &check) < 0) + return false; + if (check) { + p = xdr_inline_decode(xdr, XDR_UNIT * 2); + if (!p) + return false; + args->check_guard = 1; + args->guardtime = be32_to_cpup(p); + } else + args->check_guard = 0; + + return true; } -static __be32 * -encode_fattr3(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp, - struct kstat *stat) +static bool +svcxdr_decode_specdata3(struct xdr_stream *xdr, struct nfsd3_mknodargs *args) { - struct user_namespace *userns = nfsd_user_namespace(rqstp); - *p++ = htonl(nfs3_ftypes[(stat->mode & S_IFMT) >> 12]); - *p++ = htonl((u32) (stat->mode & S_IALLUGO)); - *p++ = htonl((u32) stat->nlink); - *p++ = htonl((u32) from_kuid_munged(userns, stat->uid)); - *p++ = htonl((u32) from_kgid_munged(userns, stat->gid)); - if (S_ISLNK(stat->mode) && stat->size > NFS3_MAXPATHLEN) { - p = xdr_encode_hyper(p, (u64) NFS3_MAXPATHLEN); - } else { - p = xdr_encode_hyper(p, (u64) stat->size); - } - p = xdr_encode_hyper(p, ((u64)stat->blocks) << 9); - *p++ = htonl((u32) MAJOR(stat->rdev)); - *p++ = htonl((u32) MINOR(stat->rdev)); - p = encode_fsid(p, fhp); - p = xdr_encode_hyper(p, stat->ino); - p = encode_time3(p, &stat->atime); - p = encode_time3(p, &stat->mtime); - p = encode_time3(p, &stat->ctime); + __be32 *p; - return p; + p = xdr_inline_decode(xdr, XDR_UNIT * 2); + if (!p) + return false; + args->major = be32_to_cpup(p++); + args->minor = be32_to_cpup(p); + + return true; } -static __be32 * -encode_saved_post_attr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp) +static bool +svcxdr_decode_devicedata3(struct svc_rqst *rqstp, struct xdr_stream *xdr, + struct nfsd3_mknodargs *args) { - /* Attributes to follow */ - *p++ = xdr_one; - return encode_fattr3(rqstp, p, fhp, &fhp->fh_post_attr); + return svcxdr_decode_sattr3(rqstp, xdr, &args->attrs) && + svcxdr_decode_specdata3(xdr, args); } -/* - * Encode post-operation attributes. - * The inode may be NULL if the call failed because of a stale file - * handle. In this case, no attributes are returned. - */ -static __be32 * -encode_post_op_attr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp) +static bool +svcxdr_encode_fattr3(struct svc_rqst *rqstp, struct xdr_stream *xdr, + const struct svc_fh *fhp, const struct kstat *stat) { - struct dentry *dentry = fhp->fh_dentry; - if (dentry && d_really_is_positive(dentry)) { - __be32 err; - struct kstat stat; - - err = fh_getattr(fhp, &stat); - if (!err) { - *p++ = xdr_one; /* attributes follow */ - lease_get_mtime(d_inode(dentry), &stat.mtime); - return encode_fattr3(rqstp, p, fhp, &stat); - } + struct user_namespace *userns = nfsd_user_namespace(rqstp); + __be32 *p; + u64 fsid; + + p = xdr_reserve_space(xdr, XDR_UNIT * 21); + if (!p) + return false; + + *p++ = cpu_to_be32(nfs3_ftypes[(stat->mode & S_IFMT) >> 12]); + *p++ = cpu_to_be32((u32)(stat->mode & S_IALLUGO)); + *p++ = cpu_to_be32((u32)stat->nlink); + *p++ = cpu_to_be32((u32)from_kuid_munged(userns, stat->uid)); + *p++ = cpu_to_be32((u32)from_kgid_munged(userns, stat->gid)); + if (S_ISLNK(stat->mode) && stat->size > NFS3_MAXPATHLEN) + p = xdr_encode_hyper(p, (u64)NFS3_MAXPATHLEN); + else + p = xdr_encode_hyper(p, (u64)stat->size); + + /* used */ + p = xdr_encode_hyper(p, ((u64)stat->blocks) << 9); + + /* rdev */ + *p++ = cpu_to_be32((u32)MAJOR(stat->rdev)); + *p++ = cpu_to_be32((u32)MINOR(stat->rdev)); + + switch(fsid_source(fhp)) { + case FSIDSOURCE_FSID: + fsid = (u64)fhp->fh_export->ex_fsid; + break; + case FSIDSOURCE_UUID: + fsid = ((u64 *)fhp->fh_export->ex_uuid)[0]; + fsid ^= ((u64 *)fhp->fh_export->ex_uuid)[1]; + break; + default: + fsid = (u64)huge_encode_dev(fhp->fh_dentry->d_sb->s_dev); } - *p++ = xdr_zero; - return p; + p = xdr_encode_hyper(p, fsid); + + /* fileid */ + p = xdr_encode_hyper(p, stat->ino); + + p = encode_nfstime3(p, &stat->atime); + p = encode_nfstime3(p, &stat->mtime); + encode_nfstime3(p, &stat->ctime); + + return true; } -/* Helper for NFSv3 ACLs */ -__be32 * -nfs3svc_encode_post_op_attr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp) +static bool +svcxdr_encode_wcc_attr(struct xdr_stream *xdr, const struct svc_fh *fhp) { - return encode_post_op_attr(rqstp, p, fhp); + __be32 *p; + + p = xdr_reserve_space(xdr, XDR_UNIT * 6); + if (!p) + return false; + p = xdr_encode_hyper(p, (u64)fhp->fh_pre_size); + p = encode_nfstime3(p, &fhp->fh_pre_mtime); + encode_nfstime3(p, &fhp->fh_pre_ctime); + + return true; } -/* - * Enocde weak cache consistency data - */ -static __be32 * -encode_wcc_data(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp) +static bool +svcxdr_encode_pre_op_attr(struct xdr_stream *xdr, const struct svc_fh *fhp) { - struct dentry *dentry = fhp->fh_dentry; - - if (dentry && d_really_is_positive(dentry) && fhp->fh_post_saved) { - if (fhp->fh_pre_saved) { - *p++ = xdr_one; - p = xdr_encode_hyper(p, (u64) fhp->fh_pre_size); - p = encode_time3(p, &fhp->fh_pre_mtime); - p = encode_time3(p, &fhp->fh_pre_ctime); - } else { - *p++ = xdr_zero; - } - return encode_saved_post_attr(rqstp, p, fhp); + if (!fhp->fh_pre_saved) { + if (xdr_stream_encode_item_absent(xdr) < 0) + return false; + return true; } - /* no pre- or post-attrs */ - *p++ = xdr_zero; - return encode_post_op_attr(rqstp, p, fhp); + + if (xdr_stream_encode_item_present(xdr) < 0) + return false; + return svcxdr_encode_wcc_attr(xdr, fhp); } -/* - * Fill in the pre_op attr for the wcc data +/** + * svcxdr_encode_post_op_attr - Encode NFSv3 post-op attributes + * @rqstp: Context of a completed RPC transaction + * @xdr: XDR stream + * @fhp: File handle to encode + * + * Return values: + * %false: Send buffer space was exhausted + * %true: Success */ -void fill_pre_wcc(struct svc_fh *fhp) +bool +svcxdr_encode_post_op_attr(struct svc_rqst *rqstp, struct xdr_stream *xdr, + const struct svc_fh *fhp) { - struct inode *inode; - struct kstat stat; - __be32 err; + struct dentry *dentry = fhp->fh_dentry; + struct kstat stat; - if (fhp->fh_pre_saved) - return; + /* + * The inode may be NULL if the call failed because of a + * stale file handle. In this case, no attributes are + * returned. + */ + if (fhp->fh_no_wcc || !dentry || !d_really_is_positive(dentry)) + goto no_post_op_attrs; + if (fh_getattr(fhp, &stat) != nfs_ok) + goto no_post_op_attrs; - inode = d_inode(fhp->fh_dentry); - err = fh_getattr(fhp, &stat); - if (err) { - /* Grab the times from inode anyway */ - stat.mtime = inode->i_mtime; - stat.ctime = inode->i_ctime; - stat.size = inode->i_size; - } + if (xdr_stream_encode_item_present(xdr) < 0) + return false; + lease_get_mtime(d_inode(dentry), &stat.mtime); + if (!svcxdr_encode_fattr3(rqstp, xdr, fhp, &stat)) + return false; + + return true; - fhp->fh_pre_mtime = stat.mtime; - fhp->fh_pre_ctime = stat.ctime; - fhp->fh_pre_size = stat.size; - fhp->fh_pre_change = nfsd4_change_attribute(&stat, inode); - fhp->fh_pre_saved = true; +no_post_op_attrs: + return xdr_stream_encode_item_absent(xdr) > 0; } /* - * Fill in the post_op attr for the wcc data + * Encode weak cache consistency data */ -void fill_post_wcc(struct svc_fh *fhp) +static bool +svcxdr_encode_wcc_data(struct svc_rqst *rqstp, struct xdr_stream *xdr, + const struct svc_fh *fhp) { - __be32 err; - - if (fhp->fh_post_saved) - printk("nfsd: inode locked twice during operation.\n"); - - err = fh_getattr(fhp, &fhp->fh_post_attr); - fhp->fh_post_change = nfsd4_change_attribute(&fhp->fh_post_attr, - d_inode(fhp->fh_dentry)); - if (err) { - fhp->fh_post_saved = false; - /* Grab the ctime anyway - set_change_info might use it */ - fhp->fh_post_attr.ctime = d_inode(fhp->fh_dentry)->i_ctime; - } else - fhp->fh_post_saved = true; + struct dentry *dentry = fhp->fh_dentry; + + if (!dentry || !d_really_is_positive(dentry) || !fhp->fh_post_saved) + goto neither; + + /* before */ + if (!svcxdr_encode_pre_op_attr(xdr, fhp)) + return false; + + /* after */ + if (xdr_stream_encode_item_present(xdr) < 0) + return false; + if (!svcxdr_encode_fattr3(rqstp, xdr, fhp, &fhp->fh_post_attr)) + return false; + + return true; + +neither: + if (xdr_stream_encode_item_absent(xdr) < 0) + return false; + if (!svcxdr_encode_post_op_attr(rqstp, xdr, fhp)) + return false; + + return true; } /* * XDR decode functions */ -int -nfs3svc_decode_fhandle(struct svc_rqst *rqstp, __be32 *p) + +bool +nfs3svc_decode_fhandleargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nfsd_fhandle *args = rqstp->rq_argp; - p = decode_fh(p, &args->fh); - if (!p) - return 0; - return xdr_argsize_check(rqstp, p); + return svcxdr_decode_nfs_fh3(xdr, &args->fh); } -int -nfs3svc_decode_sattrargs(struct svc_rqst *rqstp, __be32 *p) +bool +nfs3svc_decode_sattrargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nfsd3_sattrargs *args = rqstp->rq_argp; - p = decode_fh(p, &args->fh); - if (!p) - return 0; - p = decode_sattr3(p, &args->attrs, nfsd_user_namespace(rqstp)); - - if ((args->check_guard = ntohl(*p++)) != 0) { - struct timespec64 time; - p = decode_time3(p, &time); - args->guardtime = time.tv_sec; - } - - return xdr_argsize_check(rqstp, p); + return svcxdr_decode_nfs_fh3(xdr, &args->fh) && + svcxdr_decode_sattr3(rqstp, xdr, &args->attrs) && + svcxdr_decode_sattrguard3(xdr, args); } -int -nfs3svc_decode_diropargs(struct svc_rqst *rqstp, __be32 *p) +bool +nfs3svc_decode_diropargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nfsd3_diropargs *args = rqstp->rq_argp; - if (!(p = decode_fh(p, &args->fh)) - || !(p = decode_filename(p, &args->name, &args->len))) - return 0; - - return xdr_argsize_check(rqstp, p); + return svcxdr_decode_diropargs3(xdr, &args->fh, &args->name, &args->len); } -int -nfs3svc_decode_accessargs(struct svc_rqst *rqstp, __be32 *p) +bool +nfs3svc_decode_accessargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nfsd3_accessargs *args = rqstp->rq_argp; - p = decode_fh(p, &args->fh); - if (!p) - return 0; - args->access = ntohl(*p++); + if (!svcxdr_decode_nfs_fh3(xdr, &args->fh)) + return false; + if (xdr_stream_decode_u32(xdr, &args->access) < 0) + return false; - return xdr_argsize_check(rqstp, p); + return true; } -int -nfs3svc_decode_readargs(struct svc_rqst *rqstp, __be32 *p) +bool +nfs3svc_decode_readargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nfsd3_readargs *args = rqstp->rq_argp; - unsigned int len; - int v; - u32 max_blocksize = svc_max_payload(rqstp); - - p = decode_fh(p, &args->fh); - if (!p) - return 0; - p = xdr_decode_hyper(p, &args->offset); - - args->count = ntohl(*p++); - len = min(args->count, max_blocksize); - /* set up the kvec */ - v=0; - while (len > 0) { - struct page *p = *(rqstp->rq_next_page++); + if (!svcxdr_decode_nfs_fh3(xdr, &args->fh)) + return false; + if (xdr_stream_decode_u64(xdr, &args->offset) < 0) + return false; + if (xdr_stream_decode_u32(xdr, &args->count) < 0) + return false; - rqstp->rq_vec[v].iov_base = page_address(p); - rqstp->rq_vec[v].iov_len = min_t(unsigned int, len, PAGE_SIZE); - len -= rqstp->rq_vec[v].iov_len; - v++; - } - args->vlen = v; - return xdr_argsize_check(rqstp, p); + return true; } -int -nfs3svc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p) +bool +nfs3svc_decode_writeargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nfsd3_writeargs *args = rqstp->rq_argp; - unsigned int len, hdr, dlen; u32 max_blocksize = svc_max_payload(rqstp); - struct kvec *head = rqstp->rq_arg.head; - struct kvec *tail = rqstp->rq_arg.tail; - p = decode_fh(p, &args->fh); - if (!p) - return 0; - p = xdr_decode_hyper(p, &args->offset); - - args->count = ntohl(*p++); - args->stable = ntohl(*p++); - len = args->len = ntohl(*p++); - if ((void *)p > head->iov_base + head->iov_len) - return 0; - /* - * The count must equal the amount of data passed. - */ - if (args->count != args->len) - return 0; + if (!svcxdr_decode_nfs_fh3(xdr, &args->fh)) + return false; + if (xdr_stream_decode_u64(xdr, &args->offset) < 0) + return false; + if (xdr_stream_decode_u32(xdr, &args->count) < 0) + return false; + if (xdr_stream_decode_u32(xdr, &args->stable) < 0) + return false; - /* - * Check to make sure that we got the right number of - * bytes. - */ - hdr = (void*)p - head->iov_base; - dlen = head->iov_len + rqstp->rq_arg.page_len + tail->iov_len - hdr; - /* - * Round the length of the data which was specified up to - * the next multiple of XDR units and then compare that - * against the length which was actually received. - * Note that when RPCSEC/GSS (for example) is used, the - * data buffer can be padded so dlen might be larger - * than required. It must never be smaller. - */ - if (dlen < XDR_QUADLEN(len)*4) - return 0; + /* opaque data */ + if (xdr_stream_decode_u32(xdr, &args->len) < 0) + return false; + /* request sanity */ + if (args->count != args->len) + return false; if (args->count > max_blocksize) { args->count = max_blocksize; - len = args->len = max_blocksize; + args->len = max_blocksize; } - args->first.iov_base = (void *)p; - args->first.iov_len = head->iov_len - hdr; - return 1; + return xdr_stream_subsegment(xdr, &args->payload, args->count); } -int -nfs3svc_decode_createargs(struct svc_rqst *rqstp, __be32 *p) +bool +nfs3svc_decode_createargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nfsd3_createargs *args = rqstp->rq_argp; - if (!(p = decode_fh(p, &args->fh)) - || !(p = decode_filename(p, &args->name, &args->len))) - return 0; - - switch (args->createmode = ntohl(*p++)) { + if (!svcxdr_decode_diropargs3(xdr, &args->fh, &args->name, &args->len)) + return false; + if (xdr_stream_decode_u32(xdr, &args->createmode) < 0) + return false; + switch (args->createmode) { case NFS3_CREATE_UNCHECKED: case NFS3_CREATE_GUARDED: - p = decode_sattr3(p, &args->attrs, nfsd_user_namespace(rqstp)); - break; + return svcxdr_decode_sattr3(rqstp, xdr, &args->attrs); case NFS3_CREATE_EXCLUSIVE: - args->verf = p; - p += 2; + args->verf = xdr_inline_decode(xdr, NFS3_CREATEVERFSIZE); + if (!args->verf) + return false; break; default: - return 0; + return false; } - - return xdr_argsize_check(rqstp, p); + return true; } -int -nfs3svc_decode_mkdirargs(struct svc_rqst *rqstp, __be32 *p) +bool +nfs3svc_decode_mkdirargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nfsd3_createargs *args = rqstp->rq_argp; - if (!(p = decode_fh(p, &args->fh)) || - !(p = decode_filename(p, &args->name, &args->len))) - return 0; - p = decode_sattr3(p, &args->attrs, nfsd_user_namespace(rqstp)); - - return xdr_argsize_check(rqstp, p); + return svcxdr_decode_diropargs3(xdr, &args->fh, + &args->name, &args->len) && + svcxdr_decode_sattr3(rqstp, xdr, &args->attrs); } -int -nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p) +bool +nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nfsd3_symlinkargs *args = rqstp->rq_argp; - char *base = (char *)p; - size_t dlen; - - if (!(p = decode_fh(p, &args->ffh)) || - !(p = decode_filename(p, &args->fname, &args->flen))) - return 0; - p = decode_sattr3(p, &args->attrs, nfsd_user_namespace(rqstp)); - - args->tlen = ntohl(*p++); - - args->first.iov_base = p; - args->first.iov_len = rqstp->rq_arg.head[0].iov_len; - args->first.iov_len -= (char *)p - base; + struct kvec *head = rqstp->rq_arg.head; - dlen = args->first.iov_len + rqstp->rq_arg.page_len + - rqstp->rq_arg.tail[0].iov_len; - if (dlen < XDR_QUADLEN(args->tlen) << 2) - return 0; - return 1; + if (!svcxdr_decode_diropargs3(xdr, &args->ffh, &args->fname, &args->flen)) + return false; + if (!svcxdr_decode_sattr3(rqstp, xdr, &args->attrs)) + return false; + if (xdr_stream_decode_u32(xdr, &args->tlen) < 0) + return false; + + /* symlink_data */ + args->first.iov_len = head->iov_len - xdr_stream_pos(xdr); + args->first.iov_base = xdr_inline_decode(xdr, args->tlen); + return args->first.iov_base != NULL; } -int -nfs3svc_decode_mknodargs(struct svc_rqst *rqstp, __be32 *p) +bool +nfs3svc_decode_mknodargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nfsd3_mknodargs *args = rqstp->rq_argp; - if (!(p = decode_fh(p, &args->fh)) - || !(p = decode_filename(p, &args->name, &args->len))) - return 0; - - args->ftype = ntohl(*p++); - - if (args->ftype == NF3BLK || args->ftype == NF3CHR - || args->ftype == NF3SOCK || args->ftype == NF3FIFO) - p = decode_sattr3(p, &args->attrs, nfsd_user_namespace(rqstp)); - - if (args->ftype == NF3BLK || args->ftype == NF3CHR) { - args->major = ntohl(*p++); - args->minor = ntohl(*p++); + if (!svcxdr_decode_diropargs3(xdr, &args->fh, &args->name, &args->len)) + return false; + if (xdr_stream_decode_u32(xdr, &args->ftype) < 0) + return false; + switch (args->ftype) { + case NF3CHR: + case NF3BLK: + return svcxdr_decode_devicedata3(rqstp, xdr, args); + case NF3SOCK: + case NF3FIFO: + return svcxdr_decode_sattr3(rqstp, xdr, &args->attrs); + case NF3REG: + case NF3DIR: + case NF3LNK: + /* Valid XDR but illegal file types */ + break; + default: + return false; } - return xdr_argsize_check(rqstp, p); + return true; } -int -nfs3svc_decode_renameargs(struct svc_rqst *rqstp, __be32 *p) +bool +nfs3svc_decode_renameargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nfsd3_renameargs *args = rqstp->rq_argp; - if (!(p = decode_fh(p, &args->ffh)) - || !(p = decode_filename(p, &args->fname, &args->flen)) - || !(p = decode_fh(p, &args->tfh)) - || !(p = decode_filename(p, &args->tname, &args->tlen))) - return 0; - - return xdr_argsize_check(rqstp, p); + return svcxdr_decode_diropargs3(xdr, &args->ffh, + &args->fname, &args->flen) && + svcxdr_decode_diropargs3(xdr, &args->tfh, + &args->tname, &args->tlen); } -int -nfs3svc_decode_readlinkargs(struct svc_rqst *rqstp, __be32 *p) -{ - struct nfsd3_readlinkargs *args = rqstp->rq_argp; - - p = decode_fh(p, &args->fh); - if (!p) - return 0; - args->buffer = page_address(*(rqstp->rq_next_page++)); - - return xdr_argsize_check(rqstp, p); -} - -int -nfs3svc_decode_linkargs(struct svc_rqst *rqstp, __be32 *p) +bool +nfs3svc_decode_linkargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nfsd3_linkargs *args = rqstp->rq_argp; - if (!(p = decode_fh(p, &args->ffh)) - || !(p = decode_fh(p, &args->tfh)) - || !(p = decode_filename(p, &args->tname, &args->tlen))) - return 0; - - return xdr_argsize_check(rqstp, p); + return svcxdr_decode_nfs_fh3(xdr, &args->ffh) && + svcxdr_decode_diropargs3(xdr, &args->tfh, + &args->tname, &args->tlen); } -int -nfs3svc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p) +bool +nfs3svc_decode_readdirargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nfsd3_readdirargs *args = rqstp->rq_argp; - int len; - u32 max_blocksize = svc_max_payload(rqstp); - p = decode_fh(p, &args->fh); - if (!p) - return 0; - p = xdr_decode_hyper(p, &args->cookie); - args->verf = p; p += 2; - args->dircount = ~0; - args->count = ntohl(*p++); - len = args->count = min_t(u32, args->count, max_blocksize); - - while (len > 0) { - struct page *p = *(rqstp->rq_next_page++); - if (!args->buffer) - args->buffer = page_address(p); - len -= PAGE_SIZE; - } - - return xdr_argsize_check(rqstp, p); + if (!svcxdr_decode_nfs_fh3(xdr, &args->fh)) + return false; + if (xdr_stream_decode_u64(xdr, &args->cookie) < 0) + return false; + args->verf = xdr_inline_decode(xdr, NFS3_COOKIEVERFSIZE); + if (!args->verf) + return false; + if (xdr_stream_decode_u32(xdr, &args->count) < 0) + return false; + + return true; } -int -nfs3svc_decode_readdirplusargs(struct svc_rqst *rqstp, __be32 *p) +bool +nfs3svc_decode_readdirplusargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nfsd3_readdirargs *args = rqstp->rq_argp; - int len; - u32 max_blocksize = svc_max_payload(rqstp); - - p = decode_fh(p, &args->fh); - if (!p) - return 0; - p = xdr_decode_hyper(p, &args->cookie); - args->verf = p; p += 2; - args->dircount = ntohl(*p++); - args->count = ntohl(*p++); - - len = args->count = min(args->count, max_blocksize); - while (len > 0) { - struct page *p = *(rqstp->rq_next_page++); - if (!args->buffer) - args->buffer = page_address(p); - len -= PAGE_SIZE; - } - - return xdr_argsize_check(rqstp, p); + u32 dircount; + + if (!svcxdr_decode_nfs_fh3(xdr, &args->fh)) + return false; + if (xdr_stream_decode_u64(xdr, &args->cookie) < 0) + return false; + args->verf = xdr_inline_decode(xdr, NFS3_COOKIEVERFSIZE); + if (!args->verf) + return false; + /* dircount is ignored */ + if (xdr_stream_decode_u32(xdr, &dircount) < 0) + return false; + if (xdr_stream_decode_u32(xdr, &args->count) < 0) + return false; + + return true; } -int -nfs3svc_decode_commitargs(struct svc_rqst *rqstp, __be32 *p) +bool +nfs3svc_decode_commitargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nfsd3_commitargs *args = rqstp->rq_argp; - p = decode_fh(p, &args->fh); - if (!p) - return 0; - p = xdr_decode_hyper(p, &args->offset); - args->count = ntohl(*p++); - return xdr_argsize_check(rqstp, p); + if (!svcxdr_decode_nfs_fh3(xdr, &args->fh)) + return false; + if (xdr_stream_decode_u64(xdr, &args->offset) < 0) + return false; + if (xdr_stream_decode_u32(xdr, &args->count) < 0) + return false; + + return true; } /* * XDR encode functions */ -/* - * There must be an encoding function for void results so svc_process - * will work properly. - */ -int -nfs3svc_encode_voidres(struct svc_rqst *rqstp, __be32 *p) -{ - return xdr_ressize_check(rqstp, p); -} /* GETATTR */ -int -nfs3svc_encode_attrstat(struct svc_rqst *rqstp, __be32 *p) +bool +nfs3svc_encode_getattrres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nfsd3_attrstat *resp = rqstp->rq_resp; - if (resp->status == 0) { - lease_get_mtime(d_inode(resp->fh.fh_dentry), - &resp->stat.mtime); - p = encode_fattr3(rqstp, p, &resp->fh, &resp->stat); + if (!svcxdr_encode_nfsstat3(xdr, resp->status)) + return false; + switch (resp->status) { + case nfs_ok: + lease_get_mtime(d_inode(resp->fh.fh_dentry), &resp->stat.mtime); + if (!svcxdr_encode_fattr3(rqstp, xdr, &resp->fh, &resp->stat)) + return false; + break; } - return xdr_ressize_check(rqstp, p); + + return true; } /* SETATTR, REMOVE, RMDIR */ -int -nfs3svc_encode_wccstat(struct svc_rqst *rqstp, __be32 *p) +bool +nfs3svc_encode_wccstat(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nfsd3_attrstat *resp = rqstp->rq_resp; - p = encode_wcc_data(rqstp, p, &resp->fh); - return xdr_ressize_check(rqstp, p); + return svcxdr_encode_nfsstat3(xdr, resp->status) && + svcxdr_encode_wcc_data(rqstp, xdr, &resp->fh); } /* LOOKUP */ -int -nfs3svc_encode_diropres(struct svc_rqst *rqstp, __be32 *p) +bool +nfs3svc_encode_lookupres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nfsd3_diropres *resp = rqstp->rq_resp; - if (resp->status == 0) { - p = encode_fh(p, &resp->fh); - p = encode_post_op_attr(rqstp, p, &resp->fh); + if (!svcxdr_encode_nfsstat3(xdr, resp->status)) + return false; + switch (resp->status) { + case nfs_ok: + if (!svcxdr_encode_nfs_fh3(xdr, &resp->fh)) + return false; + if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh)) + return false; + if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->dirfh)) + return false; + break; + default: + if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->dirfh)) + return false; } - p = encode_post_op_attr(rqstp, p, &resp->dirfh); - return xdr_ressize_check(rqstp, p); + + return true; } /* ACCESS */ -int -nfs3svc_encode_accessres(struct svc_rqst *rqstp, __be32 *p) +bool +nfs3svc_encode_accessres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nfsd3_accessres *resp = rqstp->rq_resp; - p = encode_post_op_attr(rqstp, p, &resp->fh); - if (resp->status == 0) - *p++ = htonl(resp->access); - return xdr_ressize_check(rqstp, p); + if (!svcxdr_encode_nfsstat3(xdr, resp->status)) + return false; + switch (resp->status) { + case nfs_ok: + if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh)) + return false; + if (xdr_stream_encode_u32(xdr, resp->access) < 0) + return false; + break; + default: + if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh)) + return false; + } + + return true; } /* READLINK */ -int -nfs3svc_encode_readlinkres(struct svc_rqst *rqstp, __be32 *p) +bool +nfs3svc_encode_readlinkres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nfsd3_readlinkres *resp = rqstp->rq_resp; + struct kvec *head = rqstp->rq_res.head; + + if (!svcxdr_encode_nfsstat3(xdr, resp->status)) + return false; + switch (resp->status) { + case nfs_ok: + if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh)) + return false; + if (xdr_stream_encode_u32(xdr, resp->len) < 0) + return false; + xdr_write_pages(xdr, resp->pages, 0, resp->len); + if (svc_encode_result_payload(rqstp, head->iov_len, resp->len) < 0) + return false; + break; + default: + if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh)) + return false; + } - p = encode_post_op_attr(rqstp, p, &resp->fh); - if (resp->status == 0) { - *p++ = htonl(resp->len); - xdr_ressize_check(rqstp, p); - rqstp->rq_res.page_len = resp->len; - if (resp->len & 3) { - /* need to pad the tail */ - rqstp->rq_res.tail[0].iov_base = p; - *p = 0; - rqstp->rq_res.tail[0].iov_len = 4 - (resp->len&3); - } - return 1; - } else - return xdr_ressize_check(rqstp, p); + return true; } /* READ */ -int -nfs3svc_encode_readres(struct svc_rqst *rqstp, __be32 *p) +bool +nfs3svc_encode_readres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nfsd3_readres *resp = rqstp->rq_resp; + struct kvec *head = rqstp->rq_res.head; + + if (!svcxdr_encode_nfsstat3(xdr, resp->status)) + return false; + switch (resp->status) { + case nfs_ok: + if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh)) + return false; + if (xdr_stream_encode_u32(xdr, resp->count) < 0) + return false; + if (xdr_stream_encode_bool(xdr, resp->eof) < 0) + return false; + if (xdr_stream_encode_u32(xdr, resp->count) < 0) + return false; + xdr_write_pages(xdr, resp->pages, rqstp->rq_res.page_base, + resp->count); + if (svc_encode_result_payload(rqstp, head->iov_len, resp->count) < 0) + return false; + break; + default: + if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh)) + return false; + } - p = encode_post_op_attr(rqstp, p, &resp->fh); - if (resp->status == 0) { - *p++ = htonl(resp->count); - *p++ = htonl(resp->eof); - *p++ = htonl(resp->count); /* xdr opaque count */ - xdr_ressize_check(rqstp, p); - /* now update rqstp->rq_res to reflect data as well */ - rqstp->rq_res.page_len = resp->count; - if (resp->count & 3) { - /* need to pad the tail */ - rqstp->rq_res.tail[0].iov_base = p; - *p = 0; - rqstp->rq_res.tail[0].iov_len = 4 - (resp->count & 3); - } - return 1; - } else - return xdr_ressize_check(rqstp, p); + return true; } /* WRITE */ -int -nfs3svc_encode_writeres(struct svc_rqst *rqstp, __be32 *p) +bool +nfs3svc_encode_writeres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nfsd3_writeres *resp = rqstp->rq_resp; - p = encode_wcc_data(rqstp, p, &resp->fh); - if (resp->status == 0) { - *p++ = htonl(resp->count); - *p++ = htonl(resp->committed); - *p++ = resp->verf[0]; - *p++ = resp->verf[1]; + if (!svcxdr_encode_nfsstat3(xdr, resp->status)) + return false; + switch (resp->status) { + case nfs_ok: + if (!svcxdr_encode_wcc_data(rqstp, xdr, &resp->fh)) + return false; + if (xdr_stream_encode_u32(xdr, resp->count) < 0) + return false; + if (xdr_stream_encode_u32(xdr, resp->committed) < 0) + return false; + if (!svcxdr_encode_writeverf3(xdr, resp->verf)) + return false; + break; + default: + if (!svcxdr_encode_wcc_data(rqstp, xdr, &resp->fh)) + return false; } - return xdr_ressize_check(rqstp, p); + + return true; } /* CREATE, MKDIR, SYMLINK, MKNOD */ -int -nfs3svc_encode_createres(struct svc_rqst *rqstp, __be32 *p) +bool +nfs3svc_encode_createres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nfsd3_diropres *resp = rqstp->rq_resp; - if (resp->status == 0) { - *p++ = xdr_one; - p = encode_fh(p, &resp->fh); - p = encode_post_op_attr(rqstp, p, &resp->fh); + if (!svcxdr_encode_nfsstat3(xdr, resp->status)) + return false; + switch (resp->status) { + case nfs_ok: + if (!svcxdr_encode_post_op_fh3(xdr, &resp->fh)) + return false; + if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh)) + return false; + if (!svcxdr_encode_wcc_data(rqstp, xdr, &resp->dirfh)) + return false; + break; + default: + if (!svcxdr_encode_wcc_data(rqstp, xdr, &resp->dirfh)) + return false; } - p = encode_wcc_data(rqstp, p, &resp->dirfh); - return xdr_ressize_check(rqstp, p); + + return true; } /* RENAME */ -int -nfs3svc_encode_renameres(struct svc_rqst *rqstp, __be32 *p) +bool +nfs3svc_encode_renameres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nfsd3_renameres *resp = rqstp->rq_resp; - p = encode_wcc_data(rqstp, p, &resp->ffh); - p = encode_wcc_data(rqstp, p, &resp->tfh); - return xdr_ressize_check(rqstp, p); + return svcxdr_encode_nfsstat3(xdr, resp->status) && + svcxdr_encode_wcc_data(rqstp, xdr, &resp->ffh) && + svcxdr_encode_wcc_data(rqstp, xdr, &resp->tfh); } /* LINK */ -int -nfs3svc_encode_linkres(struct svc_rqst *rqstp, __be32 *p) +bool +nfs3svc_encode_linkres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nfsd3_linkres *resp = rqstp->rq_resp; - p = encode_post_op_attr(rqstp, p, &resp->fh); - p = encode_wcc_data(rqstp, p, &resp->tfh); - return xdr_ressize_check(rqstp, p); + return svcxdr_encode_nfsstat3(xdr, resp->status) && + svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh) && + svcxdr_encode_wcc_data(rqstp, xdr, &resp->tfh); } /* READDIR */ -int -nfs3svc_encode_readdirres(struct svc_rqst *rqstp, __be32 *p) +bool +nfs3svc_encode_readdirres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nfsd3_readdirres *resp = rqstp->rq_resp; + struct xdr_buf *dirlist = &resp->dirlist; + + if (!svcxdr_encode_nfsstat3(xdr, resp->status)) + return false; + switch (resp->status) { + case nfs_ok: + if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh)) + return false; + if (!svcxdr_encode_cookieverf3(xdr, resp->verf)) + return false; + xdr_write_pages(xdr, dirlist->pages, 0, dirlist->len); + /* no more entries */ + if (xdr_stream_encode_item_absent(xdr) < 0) + return false; + if (xdr_stream_encode_bool(xdr, resp->common.err == nfserr_eof) < 0) + return false; + break; + default: + if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh)) + return false; + } - p = encode_post_op_attr(rqstp, p, &resp->fh); - - if (resp->status == 0) { - /* stupid readdir cookie */ - memcpy(p, resp->verf, 8); p += 2; - xdr_ressize_check(rqstp, p); - if (rqstp->rq_res.head[0].iov_len + (2<<2) > PAGE_SIZE) - return 1; /*No room for trailer */ - rqstp->rq_res.page_len = (resp->count) << 2; - - /* add the 'tail' to the end of the 'head' page - page 0. */ - rqstp->rq_res.tail[0].iov_base = p; - *p++ = 0; /* no more entries */ - *p++ = htonl(resp->common.err == nfserr_eof); - rqstp->rq_res.tail[0].iov_len = 2<<2; - return 1; - } else - return xdr_ressize_check(rqstp, p); -} - -static __be32 * -encode_entry_baggage(struct nfsd3_readdirres *cd, __be32 *p, const char *name, - int namlen, u64 ino) -{ - *p++ = xdr_one; /* mark entry present */ - p = xdr_encode_hyper(p, ino); /* file id */ - p = xdr_encode_array(p, name, namlen);/* name length & name */ - - cd->offset = p; /* remember pointer */ - p = xdr_encode_hyper(p, NFS_OFFSET_MAX);/* offset of next entry */ - - return p; + return true; } static __be32 @@ -849,9 +990,14 @@ compose_entry_fh(struct nfsd3_readdirres *cd, struct svc_fh *fhp, if (isdotent(name, namlen)) { if (namlen == 2) { dchild = dget_parent(dparent); - /* filesystem root - cannot return filehandle for ".." */ + /* + * Don't return filehandle for ".." if we're at + * the filesystem or export root: + */ if (dchild == dparent) goto out; + if (dparent == exp->ex_path.dentry) + goto out; } else dchild = dget(dparent); } else @@ -868,263 +1014,323 @@ out: return rv; } -static __be32 *encode_entryplus_baggage(struct nfsd3_readdirres *cd, __be32 *p, const char *name, int namlen, u64 ino) -{ - struct svc_fh *fh = &cd->scratch; - __be32 err; - - fh_init(fh, NFS3_FHSIZE); - err = compose_entry_fh(cd, fh, name, namlen, ino); - if (err) { - *p++ = 0; - *p++ = 0; - goto out; - } - p = encode_post_op_attr(cd->rqstp, p, fh); - *p++ = xdr_one; /* yes, a file handle follows */ - p = encode_fh(p, fh); -out: - fh_put(fh); - return p; -} - -/* - * Encode a directory entry. This one works for both normal readdir - * and readdirplus. - * The normal readdir reply requires 2 (fileid) + 1 (stringlen) - * + string + 2 (cookie) + 1 (next) words, i.e. 6 + strlen. - * - * The readdirplus baggage is 1+21 words for post_op_attr, plus the - * file handle. +/** + * nfs3svc_encode_cookie3 - Encode a directory offset cookie + * @resp: readdir result context + * @offset: offset cookie to encode + * + * The buffer space for the offset cookie has already been reserved + * by svcxdr_encode_entry3_common(). */ - -#define NFS3_ENTRY_BAGGAGE (2 + 1 + 2 + 1) -#define NFS3_ENTRYPLUS_BAGGAGE (1 + 21 + 1 + (NFS3_FHSIZE >> 2)) -static int -encode_entry(struct readdir_cd *ccd, const char *name, int namlen, - loff_t offset, u64 ino, unsigned int d_type, int plus) +void nfs3svc_encode_cookie3(struct nfsd3_readdirres *resp, u64 offset) { - struct nfsd3_readdirres *cd = container_of(ccd, struct nfsd3_readdirres, - common); - __be32 *p = cd->buffer; - caddr_t curr_page_addr = NULL; - struct page ** page; - int slen; /* string (name) length */ - int elen; /* estimated entry length in words */ - int num_entry_words = 0; /* actual number of words */ - - if (cd->offset) { - u64 offset64 = offset; - - if (unlikely(cd->offset1)) { - /* we ended up with offset on a page boundary */ - *cd->offset = htonl(offset64 >> 32); - *cd->offset1 = htonl(offset64 & 0xffffffff); - cd->offset1 = NULL; - } else { - xdr_encode_hyper(cd->offset, offset64); - } - cd->offset = NULL; - } - - /* - dprintk("encode_entry(%.*s @%ld%s)\n", - namlen, name, (long) offset, plus? " plus" : ""); - */ - - /* truncate filename if too long */ - namlen = min(namlen, NFS3_MAXNAMLEN); + __be64 cookie = cpu_to_be64(offset); - slen = XDR_QUADLEN(namlen); - elen = slen + NFS3_ENTRY_BAGGAGE - + (plus? NFS3_ENTRYPLUS_BAGGAGE : 0); + if (!resp->cookie_offset) + return; + write_bytes_to_xdr_buf(&resp->dirlist, resp->cookie_offset, &cookie, + sizeof(cookie)); + resp->cookie_offset = 0; +} - if (cd->buflen < elen) { - cd->common.err = nfserr_toosmall; - return -EINVAL; - } +static bool +svcxdr_encode_entry3_common(struct nfsd3_readdirres *resp, const char *name, + int namlen, loff_t offset, u64 ino) +{ + struct xdr_buf *dirlist = &resp->dirlist; + struct xdr_stream *xdr = &resp->xdr; + + if (xdr_stream_encode_item_present(xdr) < 0) + return false; + /* fileid */ + if (xdr_stream_encode_u64(xdr, ino) < 0) + return false; + /* name */ + if (xdr_stream_encode_opaque(xdr, name, min(namlen, NFS3_MAXNAMLEN)) < 0) + return false; + /* cookie */ + resp->cookie_offset = dirlist->len; + if (xdr_stream_encode_u64(xdr, OFFSET_MAX) < 0) + return false; + + return true; +} - /* determine which page in rq_respages[] we are currently filling */ - for (page = cd->rqstp->rq_respages + 1; - page < cd->rqstp->rq_next_page; page++) { - curr_page_addr = page_address(*page); +/** + * nfs3svc_encode_entry3 - encode one NFSv3 READDIR entry + * @data: directory context + * @name: name of the object to be encoded + * @namlen: length of that name, in bytes + * @offset: the offset of the previous entry + * @ino: the fileid of this entry + * @d_type: unused + * + * Return values: + * %0: Entry was successfully encoded. + * %-EINVAL: An encoding problem occured, secondary status code in resp->common.err + * + * On exit, the following fields are updated: + * - resp->xdr + * - resp->common.err + * - resp->cookie_offset + */ +int nfs3svc_encode_entry3(void *data, const char *name, int namlen, + loff_t offset, u64 ino, unsigned int d_type) +{ + struct readdir_cd *ccd = data; + struct nfsd3_readdirres *resp = container_of(ccd, + struct nfsd3_readdirres, + common); + unsigned int starting_length = resp->dirlist.len; - if (((caddr_t)cd->buffer >= curr_page_addr) && - ((caddr_t)cd->buffer < curr_page_addr + PAGE_SIZE)) - break; - } + /* The offset cookie for the previous entry */ + nfs3svc_encode_cookie3(resp, offset); - if ((caddr_t)(cd->buffer + elen) < (curr_page_addr + PAGE_SIZE)) { - /* encode entry in current page */ + if (!svcxdr_encode_entry3_common(resp, name, namlen, offset, ino)) + goto out_toosmall; - p = encode_entry_baggage(cd, p, name, namlen, ino); + xdr_commit_encode(&resp->xdr); + resp->common.err = nfs_ok; + return 0; - if (plus) - p = encode_entryplus_baggage(cd, p, name, namlen, ino); - num_entry_words = p - cd->buffer; - } else if (*(page+1) != NULL) { - /* temporarily encode entry into next page, then move back to - * current and next page in rq_respages[] */ - __be32 *p1, *tmp; - int len1, len2; +out_toosmall: + resp->cookie_offset = 0; + resp->common.err = nfserr_toosmall; + resp->dirlist.len = starting_length; + return -EINVAL; +} - /* grab next page for temporary storage of entry */ - p1 = tmp = page_address(*(page+1)); +static bool +svcxdr_encode_entry3_plus(struct nfsd3_readdirres *resp, const char *name, + int namlen, u64 ino) +{ + struct xdr_stream *xdr = &resp->xdr; + struct svc_fh *fhp = &resp->scratch; + bool result; - p1 = encode_entry_baggage(cd, p1, name, namlen, ino); + result = false; + fh_init(fhp, NFS3_FHSIZE); + if (compose_entry_fh(resp, fhp, name, namlen, ino) != nfs_ok) + goto out_noattrs; - if (plus) - p1 = encode_entryplus_baggage(cd, p1, name, namlen, ino); + if (!svcxdr_encode_post_op_attr(resp->rqstp, xdr, fhp)) + goto out; + if (!svcxdr_encode_post_op_fh3(xdr, fhp)) + goto out; + result = true; - /* determine entry word length and lengths to go in pages */ - num_entry_words = p1 - tmp; - len1 = curr_page_addr + PAGE_SIZE - (caddr_t)cd->buffer; - if ((num_entry_words << 2) < len1) { - /* the actual number of words in the entry is less - * than elen and can still fit in the current page - */ - memmove(p, tmp, num_entry_words << 2); - p += num_entry_words; - - /* update offset */ - cd->offset = cd->buffer + (cd->offset - tmp); - } else { - unsigned int offset_r = (cd->offset - tmp) << 2; - - /* update pointer to offset location. - * This is a 64bit quantity, so we need to - * deal with 3 cases: - * - entirely in first page - * - entirely in second page - * - 4 bytes in each page - */ - if (offset_r + 8 <= len1) { - cd->offset = p + (cd->offset - tmp); - } else if (offset_r >= len1) { - cd->offset -= len1 >> 2; - } else { - /* sitting on the fence */ - BUG_ON(offset_r != len1 - 4); - cd->offset = p + (cd->offset - tmp); - cd->offset1 = tmp; - } - - len2 = (num_entry_words << 2) - len1; - - /* move from temp page to current and next pages */ - memmove(p, tmp, len1); - memmove(tmp, (caddr_t)tmp+len1, len2); - - p = tmp + (len2 >> 2); - } - } - else { - cd->common.err = nfserr_toosmall; - return -EINVAL; - } +out: + fh_put(fhp); + return result; + +out_noattrs: + if (xdr_stream_encode_item_absent(xdr) < 0) + return false; + if (xdr_stream_encode_item_absent(xdr) < 0) + return false; + return true; +} - cd->buflen -= num_entry_words; - cd->buffer = p; - cd->common.err = nfs_ok; +/** + * nfs3svc_encode_entryplus3 - encode one NFSv3 READDIRPLUS entry + * @data: directory context + * @name: name of the object to be encoded + * @namlen: length of that name, in bytes + * @offset: the offset of the previous entry + * @ino: the fileid of this entry + * @d_type: unused + * + * Return values: + * %0: Entry was successfully encoded. + * %-EINVAL: An encoding problem occured, secondary status code in resp->common.err + * + * On exit, the following fields are updated: + * - resp->xdr + * - resp->common.err + * - resp->cookie_offset + */ +int nfs3svc_encode_entryplus3(void *data, const char *name, int namlen, + loff_t offset, u64 ino, unsigned int d_type) +{ + struct readdir_cd *ccd = data; + struct nfsd3_readdirres *resp = container_of(ccd, + struct nfsd3_readdirres, + common); + unsigned int starting_length = resp->dirlist.len; + + /* The offset cookie for the previous entry */ + nfs3svc_encode_cookie3(resp, offset); + + if (!svcxdr_encode_entry3_common(resp, name, namlen, offset, ino)) + goto out_toosmall; + if (!svcxdr_encode_entry3_plus(resp, name, namlen, ino)) + goto out_toosmall; + + xdr_commit_encode(&resp->xdr); + resp->common.err = nfs_ok; return 0; +out_toosmall: + resp->cookie_offset = 0; + resp->common.err = nfserr_toosmall; + resp->dirlist.len = starting_length; + return -EINVAL; } -int -nfs3svc_encode_entry(void *cd, const char *name, - int namlen, loff_t offset, u64 ino, unsigned int d_type) +static bool +svcxdr_encode_fsstat3resok(struct xdr_stream *xdr, + const struct nfsd3_fsstatres *resp) { - return encode_entry(cd, name, namlen, offset, ino, d_type, 0); -} + const struct kstatfs *s = &resp->stats; + u64 bs = s->f_bsize; + __be32 *p; -int -nfs3svc_encode_entry_plus(void *cd, const char *name, - int namlen, loff_t offset, u64 ino, - unsigned int d_type) -{ - return encode_entry(cd, name, namlen, offset, ino, d_type, 1); + p = xdr_reserve_space(xdr, XDR_UNIT * 13); + if (!p) + return false; + p = xdr_encode_hyper(p, bs * s->f_blocks); /* total bytes */ + p = xdr_encode_hyper(p, bs * s->f_bfree); /* free bytes */ + p = xdr_encode_hyper(p, bs * s->f_bavail); /* user available bytes */ + p = xdr_encode_hyper(p, s->f_files); /* total inodes */ + p = xdr_encode_hyper(p, s->f_ffree); /* free inodes */ + p = xdr_encode_hyper(p, s->f_ffree); /* user available inodes */ + *p = cpu_to_be32(resp->invarsec); /* mean unchanged time */ + + return true; } /* FSSTAT */ -int -nfs3svc_encode_fsstatres(struct svc_rqst *rqstp, __be32 *p) +bool +nfs3svc_encode_fsstatres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nfsd3_fsstatres *resp = rqstp->rq_resp; - struct kstatfs *s = &resp->stats; - u64 bs = s->f_bsize; - - *p++ = xdr_zero; /* no post_op_attr */ - - if (resp->status == 0) { - p = xdr_encode_hyper(p, bs * s->f_blocks); /* total bytes */ - p = xdr_encode_hyper(p, bs * s->f_bfree); /* free bytes */ - p = xdr_encode_hyper(p, bs * s->f_bavail); /* user available bytes */ - p = xdr_encode_hyper(p, s->f_files); /* total inodes */ - p = xdr_encode_hyper(p, s->f_ffree); /* free inodes */ - p = xdr_encode_hyper(p, s->f_ffree); /* user available inodes */ - *p++ = htonl(resp->invarsec); /* mean unchanged time */ + + if (!svcxdr_encode_nfsstat3(xdr, resp->status)) + return false; + switch (resp->status) { + case nfs_ok: + if (!svcxdr_encode_post_op_attr(rqstp, xdr, &nfs3svc_null_fh)) + return false; + if (!svcxdr_encode_fsstat3resok(xdr, resp)) + return false; + break; + default: + if (!svcxdr_encode_post_op_attr(rqstp, xdr, &nfs3svc_null_fh)) + return false; } - return xdr_ressize_check(rqstp, p); + + return true; +} + +static bool +svcxdr_encode_fsinfo3resok(struct xdr_stream *xdr, + const struct nfsd3_fsinfores *resp) +{ + __be32 *p; + + p = xdr_reserve_space(xdr, XDR_UNIT * 12); + if (!p) + return false; + *p++ = cpu_to_be32(resp->f_rtmax); + *p++ = cpu_to_be32(resp->f_rtpref); + *p++ = cpu_to_be32(resp->f_rtmult); + *p++ = cpu_to_be32(resp->f_wtmax); + *p++ = cpu_to_be32(resp->f_wtpref); + *p++ = cpu_to_be32(resp->f_wtmult); + *p++ = cpu_to_be32(resp->f_dtpref); + p = xdr_encode_hyper(p, resp->f_maxfilesize); + p = encode_nfstime3(p, &nfs3svc_time_delta); + *p = cpu_to_be32(resp->f_properties); + + return true; } /* FSINFO */ -int -nfs3svc_encode_fsinfores(struct svc_rqst *rqstp, __be32 *p) +bool +nfs3svc_encode_fsinfores(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nfsd3_fsinfores *resp = rqstp->rq_resp; - *p++ = xdr_zero; /* no post_op_attr */ - - if (resp->status == 0) { - *p++ = htonl(resp->f_rtmax); - *p++ = htonl(resp->f_rtpref); - *p++ = htonl(resp->f_rtmult); - *p++ = htonl(resp->f_wtmax); - *p++ = htonl(resp->f_wtpref); - *p++ = htonl(resp->f_wtmult); - *p++ = htonl(resp->f_dtpref); - p = xdr_encode_hyper(p, resp->f_maxfilesize); - *p++ = xdr_one; - *p++ = xdr_zero; - *p++ = htonl(resp->f_properties); + if (!svcxdr_encode_nfsstat3(xdr, resp->status)) + return false; + switch (resp->status) { + case nfs_ok: + if (!svcxdr_encode_post_op_attr(rqstp, xdr, &nfs3svc_null_fh)) + return false; + if (!svcxdr_encode_fsinfo3resok(xdr, resp)) + return false; + break; + default: + if (!svcxdr_encode_post_op_attr(rqstp, xdr, &nfs3svc_null_fh)) + return false; } - return xdr_ressize_check(rqstp, p); + return true; +} + +static bool +svcxdr_encode_pathconf3resok(struct xdr_stream *xdr, + const struct nfsd3_pathconfres *resp) +{ + __be32 *p; + + p = xdr_reserve_space(xdr, XDR_UNIT * 6); + if (!p) + return false; + *p++ = cpu_to_be32(resp->p_link_max); + *p++ = cpu_to_be32(resp->p_name_max); + p = xdr_encode_bool(p, resp->p_no_trunc); + p = xdr_encode_bool(p, resp->p_chown_restricted); + p = xdr_encode_bool(p, resp->p_case_insensitive); + xdr_encode_bool(p, resp->p_case_preserving); + + return true; } /* PATHCONF */ -int -nfs3svc_encode_pathconfres(struct svc_rqst *rqstp, __be32 *p) +bool +nfs3svc_encode_pathconfres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nfsd3_pathconfres *resp = rqstp->rq_resp; - *p++ = xdr_zero; /* no post_op_attr */ - - if (resp->status == 0) { - *p++ = htonl(resp->p_link_max); - *p++ = htonl(resp->p_name_max); - *p++ = htonl(resp->p_no_trunc); - *p++ = htonl(resp->p_chown_restricted); - *p++ = htonl(resp->p_case_insensitive); - *p++ = htonl(resp->p_case_preserving); + if (!svcxdr_encode_nfsstat3(xdr, resp->status)) + return false; + switch (resp->status) { + case nfs_ok: + if (!svcxdr_encode_post_op_attr(rqstp, xdr, &nfs3svc_null_fh)) + return false; + if (!svcxdr_encode_pathconf3resok(xdr, resp)) + return false; + break; + default: + if (!svcxdr_encode_post_op_attr(rqstp, xdr, &nfs3svc_null_fh)) + return false; } - return xdr_ressize_check(rqstp, p); + return true; } /* COMMIT */ -int -nfs3svc_encode_commitres(struct svc_rqst *rqstp, __be32 *p) +bool +nfs3svc_encode_commitres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nfsd3_commitres *resp = rqstp->rq_resp; - p = encode_wcc_data(rqstp, p, &resp->fh); - /* Write verifier */ - if (resp->status == 0) { - *p++ = resp->verf[0]; - *p++ = resp->verf[1]; + if (!svcxdr_encode_nfsstat3(xdr, resp->status)) + return false; + switch (resp->status) { + case nfs_ok: + if (!svcxdr_encode_wcc_data(rqstp, xdr, &resp->fh)) + return false; + if (!svcxdr_encode_writeverf3(xdr, resp->verf)) + return false; + break; + default: + if (!svcxdr_encode_wcc_data(rqstp, xdr, &resp->fh)) + return false; } - return xdr_ressize_check(rqstp, p); + + return true; } /* diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c index 71292a0d6f09..bb8e2f6d7d03 100644 --- a/fs/nfsd/nfs4acl.c +++ b/fs/nfsd/nfs4acl.c @@ -751,57 +751,26 @@ out_estate: return ret; } -__be32 -nfsd4_set_nfs4_acl(struct svc_rqst *rqstp, struct svc_fh *fhp, - struct nfs4_acl *acl) +__be32 nfsd4_acl_to_attr(enum nfs_ftype4 type, struct nfs4_acl *acl, + struct nfsd_attrs *attr) { - __be32 error; int host_error; - struct dentry *dentry; - struct inode *inode; - struct posix_acl *pacl = NULL, *dpacl = NULL; unsigned int flags = 0; - /* Get inode */ - error = fh_verify(rqstp, fhp, 0, NFSD_MAY_SATTR); - if (error) - return error; - - dentry = fhp->fh_dentry; - inode = d_inode(dentry); + if (!acl) + return nfs_ok; - if (S_ISDIR(inode->i_mode)) + if (type == NF4DIR) flags = NFS4_ACL_DIR; - host_error = nfs4_acl_nfsv4_to_posix(acl, &pacl, &dpacl, flags); + host_error = nfs4_acl_nfsv4_to_posix(acl, &attr->na_pacl, + &attr->na_dpacl, flags); if (host_error == -EINVAL) return nfserr_attrnotsupp; - if (host_error < 0) - goto out_nfserr; - - fh_lock(fhp); - - host_error = set_posix_acl(inode, ACL_TYPE_ACCESS, pacl); - if (host_error < 0) - goto out_drop_lock; - - if (S_ISDIR(inode->i_mode)) { - host_error = set_posix_acl(inode, ACL_TYPE_DEFAULT, dpacl); - } - -out_drop_lock: - fh_unlock(fhp); - - posix_acl_release(pacl); - posix_acl_release(dpacl); -out_nfserr: - if (host_error == -EOPNOTSUPP) - return nfserr_attrnotsupp; else return nfserrno(host_error); } - static short ace2type(struct nfs4_ace *ace) { diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index c3b11a715082..f0e69edf5f0f 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -38,6 +38,7 @@ #include "nfsd.h" #include "state.h" #include "netns.h" +#include "trace.h" #include "xdr4cb.h" #include "xdr4.h" @@ -120,7 +121,7 @@ static void encode_nfs_fh4(struct xdr_stream *xdr, const struct knfsd_fh *fh) BUG_ON(length > NFS4_FHSIZE); p = xdr_reserve_space(xdr, 4 + length); - xdr_encode_opaque(p, &fh->fh_base, length); + xdr_encode_opaque(p, &fh->fh_raw, length); } /* @@ -678,7 +679,7 @@ static int nfs4_xdr_dec_cb_notify_lock(struct rpc_rqst *rqstp, * case NFS4_OK: * write_response4 coa_resok4; * default: - * length4 coa_bytes_copied; + * length4 coa_bytes_copied; * }; * struct CB_OFFLOAD4args { * nfs_fh4 coa_fh; @@ -687,21 +688,22 @@ static int nfs4_xdr_dec_cb_notify_lock(struct rpc_rqst *rqstp, * }; */ static void encode_offload_info4(struct xdr_stream *xdr, - __be32 nfserr, - const struct nfsd4_copy *cp) + const struct nfsd4_cb_offload *cbo) { __be32 *p; p = xdr_reserve_space(xdr, 4); - *p++ = nfserr; - if (!nfserr) { + *p = cbo->co_nfserr; + switch (cbo->co_nfserr) { + case nfs_ok: p = xdr_reserve_space(xdr, 4 + 8 + 4 + NFS4_VERIFIER_SIZE); p = xdr_encode_empty_array(p); - p = xdr_encode_hyper(p, cp->cp_res.wr_bytes_written); - *p++ = cpu_to_be32(cp->cp_res.wr_stable_how); - p = xdr_encode_opaque_fixed(p, cp->cp_res.wr_verifier.data, + p = xdr_encode_hyper(p, cbo->co_res.wr_bytes_written); + *p++ = cpu_to_be32(cbo->co_res.wr_stable_how); + p = xdr_encode_opaque_fixed(p, cbo->co_res.wr_verifier.data, NFS4_VERIFIER_SIZE); - } else { + break; + default: p = xdr_reserve_space(xdr, 8); /* We always return success if bytes were written */ p = xdr_encode_hyper(p, 0); @@ -709,18 +711,16 @@ static void encode_offload_info4(struct xdr_stream *xdr, } static void encode_cb_offload4args(struct xdr_stream *xdr, - __be32 nfserr, - const struct knfsd_fh *fh, - const struct nfsd4_copy *cp, + const struct nfsd4_cb_offload *cbo, struct nfs4_cb_compound_hdr *hdr) { __be32 *p; p = xdr_reserve_space(xdr, 4); - *p++ = cpu_to_be32(OP_CB_OFFLOAD); - encode_nfs_fh4(xdr, fh); - encode_stateid4(xdr, &cp->cp_res.cb_stateid); - encode_offload_info4(xdr, nfserr, cp); + *p = cpu_to_be32(OP_CB_OFFLOAD); + encode_nfs_fh4(xdr, &cbo->co_fh); + encode_stateid4(xdr, &cbo->co_res.cb_stateid); + encode_offload_info4(xdr, cbo); hdr->nops++; } @@ -730,8 +730,8 @@ static void nfs4_xdr_enc_cb_offload(struct rpc_rqst *req, const void *data) { const struct nfsd4_callback *cb = data; - const struct nfsd4_copy *cp = - container_of(cb, struct nfsd4_copy, cp_cb); + const struct nfsd4_cb_offload *cbo = + container_of(cb, struct nfsd4_cb_offload, co_cb); struct nfs4_cb_compound_hdr hdr = { .ident = 0, .minorversion = cb->cb_clp->cl_minorversion, @@ -739,7 +739,7 @@ static void nfs4_xdr_enc_cb_offload(struct rpc_rqst *req, encode_cb_compound4args(xdr, &hdr); encode_cb_sequence4args(xdr, cb, &hdr); - encode_cb_offload4args(xdr, cp->nfserr, &cp->fh, cp, &hdr); + encode_cb_offload4args(xdr, cbo, &hdr); encode_cb_nops(&hdr); } @@ -904,8 +904,10 @@ static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *c if (clp->cl_minorversion == 0) { if (!clp->cl_cred.cr_principal && - (clp->cl_cred.cr_flavor >= RPC_AUTH_GSS_KRB5)) + (clp->cl_cred.cr_flavor >= RPC_AUTH_GSS_KRB5)) { + trace_nfsd_cb_setup_err(clp, -EINVAL); return -EINVAL; + } args.client_name = clp->cl_cred.cr_principal; args.prognumber = conn->cb_prog; args.protocol = XPRT_TRANSPORT_TCP; @@ -925,40 +927,44 @@ static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *c /* Create RPC client */ client = rpc_create(&args); if (IS_ERR(client)) { - dprintk("NFSD: couldn't create callback client: %ld\n", - PTR_ERR(client)); + trace_nfsd_cb_setup_err(clp, PTR_ERR(client)); return PTR_ERR(client); } cred = get_backchannel_cred(clp, client, ses); if (!cred) { + trace_nfsd_cb_setup_err(clp, -ENOMEM); rpc_shutdown_client(client); return -ENOMEM; } clp->cl_cb_client = client; clp->cl_cb_cred = cred; + rcu_read_lock(); + trace_nfsd_cb_setup(clp, rpc_peeraddr2str(client, RPC_DISPLAY_NETID), + args.authflavor); + rcu_read_unlock(); return 0; } -static void warn_no_callback_path(struct nfs4_client *clp, int reason) +static void nfsd4_mark_cb_state(struct nfs4_client *clp, int newstate) { - dprintk("NFSD: warning: no callback path to client %.*s: error %d\n", - (int)clp->cl_name.len, clp->cl_name.data, reason); + if (clp->cl_cb_state != newstate) { + clp->cl_cb_state = newstate; + trace_nfsd_cb_state(clp); + } } static void nfsd4_mark_cb_down(struct nfs4_client *clp, int reason) { if (test_bit(NFSD4_CLIENT_CB_UPDATE, &clp->cl_flags)) return; - clp->cl_cb_state = NFSD4_CB_DOWN; - warn_no_callback_path(clp, reason); + nfsd4_mark_cb_state(clp, NFSD4_CB_DOWN); } static void nfsd4_mark_cb_fault(struct nfs4_client *clp, int reason) { if (test_bit(NFSD4_CLIENT_CB_UPDATE, &clp->cl_flags)) return; - clp->cl_cb_state = NFSD4_CB_FAULT; - warn_no_callback_path(clp, reason); + nfsd4_mark_cb_state(clp, NFSD4_CB_FAULT); } static void nfsd4_cb_probe_done(struct rpc_task *task, void *calldata) @@ -968,7 +974,7 @@ static void nfsd4_cb_probe_done(struct rpc_task *task, void *calldata) if (task->tk_status) nfsd4_mark_cb_down(clp, task->tk_status); else - clp->cl_cb_state = NFSD4_CB_UP; + nfsd4_mark_cb_state(clp, NFSD4_CB_UP); } static void nfsd4_cb_probe_release(void *calldata) @@ -992,7 +998,8 @@ static const struct rpc_call_ops nfsd4_cb_probe_ops = { */ void nfsd4_probe_callback(struct nfs4_client *clp) { - clp->cl_cb_state = NFSD4_CB_UNKNOWN; + trace_nfsd_cb_probe(clp); + nfsd4_mark_cb_state(clp, NFSD4_CB_UNKNOWN); set_bit(NFSD4_CLIENT_CB_UPDATE, &clp->cl_flags); nfsd4_run_cb(&clp->cl_cb_null); } @@ -1005,7 +1012,7 @@ void nfsd4_probe_callback_sync(struct nfs4_client *clp) void nfsd4_change_callback(struct nfs4_client *clp, struct nfs4_cb_conn *conn) { - clp->cl_cb_state = NFSD4_CB_UNKNOWN; + nfsd4_mark_cb_state(clp, NFSD4_CB_UNKNOWN); spin_lock(&clp->cl_lock); memcpy(&clp->cl_cb_conn, conn, sizeof(struct nfs4_cb_conn)); spin_unlock(&clp->cl_lock); @@ -1114,7 +1121,7 @@ static bool nfsd4_cb_sequence_done(struct rpc_task *task, struct nfsd4_callback break; case -ESERVERFAULT: ++session->se_cb_seq_nr; - /* Fall through */ + fallthrough; case 1: case -NFS4ERR_BADSESSION: nfsd4_mark_cb_fault(cb->cb_clp, cb->cb_seq_status); @@ -1165,9 +1172,6 @@ static void nfsd4_cb_done(struct rpc_task *task, void *calldata) struct nfsd4_callback *cb = calldata; struct nfs4_client *clp = cb->cb_clp; - dprintk("%s: minorversion=%d\n", __func__, - clp->cl_minorversion); - if (!nfsd4_cb_sequence_done(task, cb)) return; @@ -1185,6 +1189,7 @@ static void nfsd4_cb_done(struct rpc_task *task, void *calldata) switch (task->tk_status) { case -EIO: case -ETIMEDOUT: + case -EACCES: nfsd4_mark_cb_down(clp, task->tk_status); } break; @@ -1226,6 +1231,9 @@ void nfsd4_destroy_callback_queue(void) /* must be called under the state lock */ void nfsd4_shutdown_callback(struct nfs4_client *clp) { + if (clp->cl_cb_state != NFSD4_CB_UNKNOWN) + trace_nfsd_cb_shutdown(clp); + set_bit(NFSD4_CLIENT_CB_KILL, &clp->cl_flags); /* * Note this won't actually result in a null callback; @@ -1301,6 +1309,8 @@ static void nfsd4_process_cb_update(struct nfsd4_callback *cb) err = setup_callback_client(clp, &conn, ses); if (err) { nfsd4_mark_cb_down(clp, err); + if (c) + svc_xprt_put(c->cn_xprt); return; } } @@ -1312,6 +1322,7 @@ nfsd4_run_cb_work(struct work_struct *work) container_of(work, struct nfsd4_callback, cb_work); struct nfs4_client *clp = cb->cb_clp; struct rpc_clnt *clnt; + int flags; if (cb->cb_need_restart) { cb->cb_need_restart = false; @@ -1334,13 +1345,14 @@ nfsd4_run_cb_work(struct work_struct *work) * Don't send probe messages for 4.1 or later. */ if (!cb->cb_ops && clp->cl_minorversion) { - clp->cl_cb_state = NFSD4_CB_UP; + nfsd4_mark_cb_state(clp, NFSD4_CB_UP); nfsd41_destroy_cb(cb); return; } cb->cb_msg.rpc_cred = clp->cl_cb_cred; - rpc_call_async(clnt, &cb->cb_msg, RPC_TASK_SOFT | RPC_TASK_SOFTCONN, + flags = clp->cl_minorversion ? RPC_TASK_NOCONNECT : RPC_TASK_SOFTCONN; + rpc_call_async(clnt, &cb->cb_msg, RPC_TASK_SOFT | flags, cb->cb_ops ? &nfsd4_cb_ops : &nfsd4_cb_probe_ops, cb); } @@ -1359,11 +1371,21 @@ void nfsd4_init_cb(struct nfsd4_callback *cb, struct nfs4_client *clp, cb->cb_holds_slot = false; } -void nfsd4_run_cb(struct nfsd4_callback *cb) +/** + * nfsd4_run_cb - queue up a callback job to run + * @cb: callback to queue + * + * Kick off a callback to do its thing. Returns false if it was already + * on a queue, true otherwise. + */ +bool nfsd4_run_cb(struct nfsd4_callback *cb) { struct nfs4_client *clp = cb->cb_clp; + bool queued; nfsd41_cb_inflight_begin(clp); - if (!nfsd4_queue_cb(cb)) + queued = nfsd4_queue_cb(cb); + if (!queued) nfsd41_cb_inflight_end(clp); + return queued; } diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c index d1f285245af8..e70a1a2999b7 100644 --- a/fs/nfsd/nfs4idmap.c +++ b/fs/nfsd/nfs4idmap.c @@ -82,8 +82,8 @@ ent_init(struct cache_head *cnew, struct cache_head *citm) new->id = itm->id; new->type = itm->type; - strlcpy(new->name, itm->name, sizeof(new->name)); - strlcpy(new->authname, itm->authname, sizeof(new->authname)); + strscpy(new->name, itm->name, sizeof(new->name)); + strscpy(new->authname, itm->authname, sizeof(new->authname)); } static void @@ -122,6 +122,12 @@ idtoname_hash(struct ent *ent) return hash; } +static int +idtoname_upcall(struct cache_detail *cd, struct cache_head *h) +{ + return sunrpc_cache_pipe_upcall_timeout(cd, h); +} + static void idtoname_request(struct cache_detail *cd, struct cache_head *ch, char **bpp, int *blen) @@ -162,7 +168,7 @@ idtoname_show(struct seq_file *m, struct cache_detail *cd, struct cache_head *h) ent->id); if (test_bit(CACHE_VALID, &h->flags)) seq_printf(m, " %s", ent->name); - seq_printf(m, "\n"); + seq_putc(m, '\n'); return 0; } @@ -184,6 +190,7 @@ static const struct cache_detail idtoname_cache_template = { .hash_size = ENT_HASHMAX, .name = "nfs4.idtoname", .cache_put = ent_put, + .cache_upcall = idtoname_upcall, .cache_request = idtoname_request, .cache_parse = idtoname_parse, .cache_show = idtoname_show, @@ -295,6 +302,12 @@ nametoid_hash(struct ent *ent) return hash_str(ent->name, ENT_HASHBITS); } +static int +nametoid_upcall(struct cache_detail *cd, struct cache_head *h) +{ + return sunrpc_cache_pipe_upcall_timeout(cd, h); +} + static void nametoid_request(struct cache_detail *cd, struct cache_head *ch, char **bpp, int *blen) @@ -333,7 +346,7 @@ nametoid_show(struct seq_file *m, struct cache_detail *cd, struct cache_head *h) ent->name); if (test_bit(CACHE_VALID, &h->flags)) seq_printf(m, " %u", ent->id); - seq_printf(m, "\n"); + seq_putc(m, '\n'); return 0; } @@ -347,6 +360,7 @@ static const struct cache_detail nametoid_cache_template = { .hash_size = ENT_HASHMAX, .name = "nfs4.nametoid", .cache_put = ent_put, + .cache_upcall = nametoid_upcall, .cache_request = nametoid_request, .cache_parse = nametoid_parse, .cache_show = nametoid_show, @@ -534,7 +548,7 @@ idmap_name_to_id(struct svc_rqst *rqstp, int type, const char *name, u32 namelen return nfserr_badowner; memcpy(key.name, name, namelen); key.name[namelen] = '\0'; - strlcpy(key.authname, rqst_authname(rqstp), sizeof(key.authname)); + strscpy(key.authname, rqst_authname(rqstp), sizeof(key.authname)); ret = idmap_lookup(rqstp, nametoid_lookup, &key, nn->nametoid_cache, &item); if (ret == -ENOENT) return nfserr_badowner; @@ -570,7 +584,7 @@ static __be32 idmap_id_to_name(struct xdr_stream *xdr, int ret; struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); - strlcpy(key.authname, rqst_authname(rqstp), sizeof(key.authname)); + strscpy(key.authname, rqst_authname(rqstp), sizeof(key.authname)); ret = idmap_lookup(rqstp, idtoname_lookup, &key, nn->idtoname_cache, &item); if (ret == -ENOENT) return encode_ascii_id(xdr, id); diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c index e12409eca7cc..3564d1c6f610 100644 --- a/fs/nfsd/nfs4layouts.c +++ b/fs/nfsd/nfs4layouts.c @@ -145,8 +145,9 @@ void nfsd4_setup_layout_type(struct svc_export *exp) #ifdef CONFIG_NFSD_SCSILAYOUT if (sb->s_export_op->map_blocks && sb->s_export_op->commit_blocks && - sb->s_bdev && sb->s_bdev->bd_disk->fops->pr_ops && - blk_queue_scsi_passthrough(sb->s_bdev->bd_disk->queue)) + sb->s_bdev && + sb->s_bdev->bd_disk->fops->pr_ops && + sb->s_bdev->bd_disk->fops->get_unique_id) exp->ex_layout_types |= 1 << LAYOUT_SCSI; #endif } @@ -421,7 +422,7 @@ nfsd4_insert_layout(struct nfsd4_layoutget *lgp, struct nfs4_layout_stateid *ls) new = kmem_cache_alloc(nfs4_layout_cache, GFP_KERNEL); if (!new) return nfserr_jukebox; - memcpy(&new->lo_seg, seg, sizeof(lp->lo_seg)); + memcpy(&new->lo_seg, seg, sizeof(new->lo_seg)); new->lo_state = ls; spin_lock(&fp->fi_lock); @@ -657,7 +658,7 @@ nfsd4_cb_layout_done(struct nfsd4_callback *cb, struct rpc_task *task) ktime_t now, cutoff; const struct nfsd4_layout_ops *ops; - + trace_nfsd_cb_layout_done(&ls->ls_stid.sc_stateid, task); switch (task->tk_status) { case 0: case -NFS4ERR_DELAY: @@ -681,7 +682,7 @@ nfsd4_cb_layout_done(struct nfsd4_callback *cb, struct rpc_task *task) rpc_delay(task, HZ/100); /* 10 mili-seconds */ return 0; } - /* Fallthrough */ + fallthrough; default: /* * Unknown error or non-responding client, we'll need to fence. diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 0e75f7fb5fec..8beb2bc4c328 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -37,7 +37,10 @@ #include <linux/falloc.h> #include <linux/slab.h> #include <linux/kthread.h> +#include <linux/namei.h> + #include <linux/sunrpc/addr.h> +#include <linux/nfs_ssc.h> #include "idmap.h" #include "cache.h" @@ -49,34 +52,16 @@ #include "pnfs.h" #include "trace.h" -#ifdef CONFIG_NFSD_V4_SECURITY_LABEL -#include <linux/security.h> - -static inline void -nfsd4_security_inode_setsecctx(struct svc_fh *resfh, struct xdr_netobj *label, u32 *bmval) -{ - struct inode *inode = d_inode(resfh->fh_dentry); - int status; - - inode_lock(inode); - status = security_inode_setsecctx(resfh->fh_dentry, - label->data, label->len); - inode_unlock(inode); - - if (status) - /* - * XXX: We should really fail the whole open, but we may - * already have created a new file, so it may be too - * late. For now this seems the least of evils: - */ - bmval[2] &= ~FATTR4_WORD2_SECURITY_LABEL; +static bool inter_copy_offload_enable; +module_param(inter_copy_offload_enable, bool, 0644); +MODULE_PARM_DESC(inter_copy_offload_enable, + "Enable inter server to server copy offload. Default: false"); - return; -} -#else -static inline void -nfsd4_security_inode_setsecctx(struct svc_fh *resfh, struct xdr_netobj *label, u32 *bmval) -{ } +#ifdef CONFIG_NFSD_V4_2_INTER_SSC +static int nfsd4_ssc_umount_timeout = 900000; /* default to 15 mins */ +module_param(nfsd4_ssc_umount_timeout, int, 0644); +MODULE_PARM_DESC(nfsd4_ssc_umount_timeout, + "idle msecs before unmount export from source server"); #endif #define NFSDDBG_FACILITY NFSDDBG_PROC @@ -143,26 +128,6 @@ is_create_with_attrs(struct nfsd4_open *open) || open->op_createmode == NFS4_CREATE_EXCLUSIVE4_1); } -/* - * if error occurs when setting the acl, just clear the acl bit - * in the returned attr bitmap. - */ -static void -do_set_nfs4_acl(struct svc_rqst *rqstp, struct svc_fh *fhp, - struct nfs4_acl *acl, u32 *bmval) -{ - __be32 status; - - status = nfsd4_set_nfs4_acl(rqstp, fhp, acl); - if (status) - /* - * We should probably fail the whole open at this point, - * but we've already created the file, so it's too late; - * So this seems the least of evils: - */ - bmval[0] &= ~FATTR4_WORD0_ACL; -} - static inline void fh_dup2(struct svc_fh *dst, struct svc_fh *src) { @@ -176,7 +141,6 @@ fh_dup2(struct svc_fh *dst, struct svc_fh *src) static __be32 do_open_permission(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open, int accmode) { - __be32 status; if (open->op_truncate && !(open->op_share_access & NFS4_SHARE_ACCESS_WRITE)) @@ -191,9 +155,7 @@ do_open_permission(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfs if (open->op_share_deny & NFS4_SHARE_DENY_READ) accmode |= NFSD_MAY_WRITE; - status = fh_verify(rqstp, current_fh, S_IFREG, accmode); - - return status; + return fh_verify(rqstp, current_fh, S_IFREG, accmode); } static __be32 nfsd_check_obj_isreg(struct svc_fh *fh) @@ -222,6 +184,202 @@ static void nfsd4_set_open_owner_reply_cache(struct nfsd4_compound_state *cstate &resfh->fh_handle); } +static inline bool nfsd4_create_is_exclusive(int createmode) +{ + return createmode == NFS4_CREATE_EXCLUSIVE || + createmode == NFS4_CREATE_EXCLUSIVE4_1; +} + +static __be32 +nfsd4_vfs_create(struct svc_fh *fhp, struct dentry *child, + struct nfsd4_open *open) +{ + struct file *filp; + struct path path; + int oflags; + + oflags = O_CREAT | O_LARGEFILE; + switch (open->op_share_access & NFS4_SHARE_ACCESS_BOTH) { + case NFS4_SHARE_ACCESS_WRITE: + oflags |= O_WRONLY; + break; + case NFS4_SHARE_ACCESS_BOTH: + oflags |= O_RDWR; + break; + default: + oflags |= O_RDONLY; + } + + path.mnt = fhp->fh_export->ex_path.mnt; + path.dentry = child; + filp = dentry_create(&path, oflags, open->op_iattr.ia_mode, + current_cred()); + if (IS_ERR(filp)) + return nfserrno(PTR_ERR(filp)); + + open->op_filp = filp; + return nfs_ok; +} + +/* + * Implement NFSv4's unchecked, guarded, and exclusive create + * semantics for regular files. Open state for this new file is + * subsequently fabricated in nfsd4_process_open2(). + * + * Upon return, caller must release @fhp and @resfhp. + */ +static __be32 +nfsd4_create_file(struct svc_rqst *rqstp, struct svc_fh *fhp, + struct svc_fh *resfhp, struct nfsd4_open *open) +{ + struct iattr *iap = &open->op_iattr; + struct nfsd_attrs attrs = { + .na_iattr = iap, + .na_seclabel = &open->op_label, + }; + struct dentry *parent, *child; + __u32 v_mtime, v_atime; + struct inode *inode; + __be32 status; + int host_err; + + if (isdotent(open->op_fname, open->op_fnamelen)) + return nfserr_exist; + if (!(iap->ia_valid & ATTR_MODE)) + iap->ia_mode = 0; + + status = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_EXEC); + if (status != nfs_ok) + return status; + parent = fhp->fh_dentry; + inode = d_inode(parent); + + host_err = fh_want_write(fhp); + if (host_err) + return nfserrno(host_err); + + if (is_create_with_attrs(open)) + nfsd4_acl_to_attr(NF4REG, open->op_acl, &attrs); + + inode_lock_nested(inode, I_MUTEX_PARENT); + + child = lookup_one_len(open->op_fname, parent, open->op_fnamelen); + if (IS_ERR(child)) { + status = nfserrno(PTR_ERR(child)); + goto out; + } + + if (d_really_is_negative(child)) { + status = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_CREATE); + if (status != nfs_ok) + goto out; + } + + status = fh_compose(resfhp, fhp->fh_export, child, fhp); + if (status != nfs_ok) + goto out; + + v_mtime = 0; + v_atime = 0; + if (nfsd4_create_is_exclusive(open->op_createmode)) { + u32 *verifier = (u32 *)open->op_verf.data; + + /* + * Solaris 7 gets confused (bugid 4218508) if these have + * the high bit set, as do xfs filesystems without the + * "bigtime" feature. So just clear the high bits. If this + * is ever changed to use different attrs for storing the + * verifier, then do_open_lookup() will also need to be + * fixed accordingly. + */ + v_mtime = verifier[0] & 0x7fffffff; + v_atime = verifier[1] & 0x7fffffff; + } + + if (d_really_is_positive(child)) { + status = nfs_ok; + + /* NFSv4 protocol requires change attributes even though + * no change happened. + */ + fh_fill_both_attrs(fhp); + + switch (open->op_createmode) { + case NFS4_CREATE_UNCHECKED: + if (!d_is_reg(child)) + break; + + /* + * In NFSv4, we don't want to truncate the file + * now. This would be wrong if the OPEN fails for + * some other reason. Furthermore, if the size is + * nonzero, we should ignore it according to spec! + */ + open->op_truncate = (iap->ia_valid & ATTR_SIZE) && + !iap->ia_size; + break; + case NFS4_CREATE_GUARDED: + status = nfserr_exist; + break; + case NFS4_CREATE_EXCLUSIVE: + if (d_inode(child)->i_mtime.tv_sec == v_mtime && + d_inode(child)->i_atime.tv_sec == v_atime && + d_inode(child)->i_size == 0) { + open->op_created = true; + break; /* subtle */ + } + status = nfserr_exist; + break; + case NFS4_CREATE_EXCLUSIVE4_1: + if (d_inode(child)->i_mtime.tv_sec == v_mtime && + d_inode(child)->i_atime.tv_sec == v_atime && + d_inode(child)->i_size == 0) { + open->op_created = true; + goto set_attr; /* subtle */ + } + status = nfserr_exist; + } + goto out; + } + + if (!IS_POSIXACL(inode)) + iap->ia_mode &= ~current_umask(); + + fh_fill_pre_attrs(fhp); + status = nfsd4_vfs_create(fhp, child, open); + if (status != nfs_ok) + goto out; + open->op_created = true; + fh_fill_post_attrs(fhp); + + /* A newly created file already has a file size of zero. */ + if ((iap->ia_valid & ATTR_SIZE) && (iap->ia_size == 0)) + iap->ia_valid &= ~ATTR_SIZE; + if (nfsd4_create_is_exclusive(open->op_createmode)) { + iap->ia_valid = ATTR_MTIME | ATTR_ATIME | + ATTR_MTIME_SET|ATTR_ATIME_SET; + iap->ia_mtime.tv_sec = v_mtime; + iap->ia_atime.tv_sec = v_atime; + iap->ia_mtime.tv_nsec = 0; + iap->ia_atime.tv_nsec = 0; + } + +set_attr: + status = nfsd_create_setattr(rqstp, fhp, resfhp, &attrs); + + if (attrs.na_labelerr) + open->op_bmval[2] &= ~FATTR4_WORD2_SECURITY_LABEL; + if (attrs.na_aclerr) + open->op_bmval[0] &= ~FATTR4_WORD0_ACL; +out: + inode_unlock(inode); + nfsd_attrs_free(&attrs); + if (child && !IS_ERR(child)) + dput(child); + fh_drop_write(fhp); + return status; +} + static __be32 do_open_lookup(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_open *open, struct svc_fh **resfh) { @@ -251,47 +409,33 @@ do_open_lookup(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, stru * yes | yes | GUARDED4 | GUARDED4 */ - /* - * Note: create modes (UNCHECKED,GUARDED...) are the same - * in NFSv4 as in v3 except EXCLUSIVE4_1. - */ current->fs->umask = open->op_umask; - status = do_nfsd_create(rqstp, current_fh, open->op_fname.data, - open->op_fname.len, &open->op_iattr, - *resfh, open->op_createmode, - (u32 *)open->op_verf.data, - &open->op_truncate, &open->op_created); + status = nfsd4_create_file(rqstp, current_fh, *resfh, open); current->fs->umask = 0; - if (!status && open->op_label.len) - nfsd4_security_inode_setsecctx(*resfh, &open->op_label, open->op_bmval); - /* * Following rfc 3530 14.2.16, and rfc 5661 18.16.4 * use the returned bitmask to indicate which attributes * we used to store the verifier: */ - if (nfsd_create_is_exclusive(open->op_createmode) && status == 0) + if (nfsd4_create_is_exclusive(open->op_createmode) && status == 0) open->op_bmval[1] |= (FATTR4_WORD1_TIME_ACCESS | FATTR4_WORD1_TIME_MODIFY); - } else - /* - * Note this may exit with the parent still locked. - * We will hold the lock until nfsd4_open's final - * lookup, to prevent renames or unlinks until we've had - * a chance to an acquire a delegation if appropriate. - */ + } else { status = nfsd_lookup(rqstp, current_fh, - open->op_fname.data, open->op_fname.len, *resfh); + open->op_fname, open->op_fnamelen, *resfh); + if (!status) + /* NFSv4 protocol requires change attributes even though + * no change happened. + */ + fh_fill_both_attrs(current_fh); + } if (status) goto out; status = nfsd_check_obj_isreg(*resfh); if (status) goto out; - if (is_create_with_attrs(open) && open->op_acl != NULL) - do_set_nfs4_acl(rqstp, *resfh, open->op_acl, open->op_bmval); - nfsd4_set_open_owner_reply_cache(cstate, open, *resfh); accmode = NFSD_MAY_NOP; if (open->op_created || @@ -307,7 +451,6 @@ static __be32 do_open_fhandle(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_open *open) { struct svc_fh *current_fh = &cstate->current_fh; - __be32 status; int accmode = 0; /* We don't know the target directory, and therefore can not @@ -332,9 +475,7 @@ do_open_fhandle(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, str if (open->op_claim_type == NFS4_OPEN_CLAIM_DELEG_CUR_FH) accmode = NFSD_MAY_OWNER_OVERRIDE; - status = do_open_permission(rqstp, current_fh, open, accmode); - - return status; + return do_open_permission(rqstp, current_fh, open, accmode); } static void @@ -359,9 +500,12 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, bool reclaim = false; dprintk("NFSD: nfsd4_open filename %.*s op_openowner %p\n", - (int)open->op_fname.len, open->op_fname.data, + (int)open->op_fnamelen, open->op_fname, open->op_openowner); + open->op_filp = NULL; + open->op_rqstp = rqstp; + /* This check required by spec. */ if (open->op_create && open->op_claim_type != NFS4_OPEN_CLAIM_NULL) return nfserr_inval; @@ -372,8 +516,7 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, * Before RECLAIM_COMPLETE done, server should deny new lock */ if (nfsd4_has_session(cstate) && - !test_bit(NFSD4_CLIENT_RECLAIM_COMPLETE, - &cstate->session->se_client->cl_flags) && + !test_bit(NFSD4_CLIENT_RECLAIM_COMPLETE, &cstate->clp->cl_flags) && open->op_claim_type != NFS4_OPEN_CLAIM_PREVIOUS) return nfserr_grace; @@ -415,51 +558,46 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, goto out; switch (open->op_claim_type) { - case NFS4_OPEN_CLAIM_DELEGATE_CUR: - case NFS4_OPEN_CLAIM_NULL: - status = do_open_lookup(rqstp, cstate, open, &resfh); - if (status) - goto out; - break; - case NFS4_OPEN_CLAIM_PREVIOUS: - status = nfs4_check_open_reclaim(&open->op_clientid, - cstate, nn); - if (status) - goto out; - open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED; - reclaim = true; - /* fall through */ - case NFS4_OPEN_CLAIM_FH: - case NFS4_OPEN_CLAIM_DELEG_CUR_FH: - status = do_open_fhandle(rqstp, cstate, open); - if (status) - goto out; - resfh = &cstate->current_fh; - break; - case NFS4_OPEN_CLAIM_DELEG_PREV_FH: - case NFS4_OPEN_CLAIM_DELEGATE_PREV: - dprintk("NFSD: unsupported OPEN claim type %d\n", - open->op_claim_type); - status = nfserr_notsupp; + case NFS4_OPEN_CLAIM_DELEGATE_CUR: + case NFS4_OPEN_CLAIM_NULL: + status = do_open_lookup(rqstp, cstate, open, &resfh); + if (status) goto out; - default: - dprintk("NFSD: Invalid OPEN claim type %d\n", - open->op_claim_type); - status = nfserr_inval; + break; + case NFS4_OPEN_CLAIM_PREVIOUS: + status = nfs4_check_open_reclaim(cstate->clp); + if (status) + goto out; + open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED; + reclaim = true; + fallthrough; + case NFS4_OPEN_CLAIM_FH: + case NFS4_OPEN_CLAIM_DELEG_CUR_FH: + status = do_open_fhandle(rqstp, cstate, open); + if (status) goto out; + resfh = &cstate->current_fh; + break; + case NFS4_OPEN_CLAIM_DELEG_PREV_FH: + case NFS4_OPEN_CLAIM_DELEGATE_PREV: + status = nfserr_notsupp; + goto out; + default: + status = nfserr_inval; + goto out; } - /* - * nfsd4_process_open2() does the actual opening of the file. If - * successful, it (1) truncates the file if open->op_truncate was - * set, (2) sets open->op_stateid, (3) sets open->op_delegation. - */ + status = nfsd4_process_open2(rqstp, resfh, open); - WARN(status && open->op_created, - "nfsd4_process_open2 failed to open newly-created file! status=%u\n", - be32_to_cpu(status)); + if (status && open->op_created) + pr_warn("nfsd4_process_open2 failed to open newly-created file: status=%u\n", + be32_to_cpu(status)); if (reclaim && !status) nn->somebody_reclaimed = true; out: + if (open->op_filp) { + fput(open->op_filp); + open->op_filp = NULL; + } if (resfh && resfh != &cstate->current_fh) { fh_dup2(&cstate->current_fh, resfh); fh_put(resfh); @@ -508,7 +646,7 @@ nfsd4_putfh(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, fh_put(&cstate->current_fh); cstate->current_fh.fh_handle.fh_size = putfh->pf_fhlen; - memcpy(&cstate->current_fh.fh_handle.fh_base, putfh->pf_fhval, + memcpy(&cstate->current_fh.fh_handle.fh_raw, putfh->pf_fhval, putfh->pf_fhlen); ret = fh_verify(rqstp, &cstate->current_fh, 0, NFSD_MAY_BYPASS_GSS); #ifdef CONFIG_NFSD_V4_2_INTER_SSC @@ -524,11 +662,9 @@ static __be32 nfsd4_putrootfh(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) { - __be32 status; - fh_put(&cstate->current_fh); - status = exp_pseudoroot(rqstp, &cstate->current_fh); - return status; + + return exp_pseudoroot(rqstp, &cstate->current_fh); } static __be32 @@ -566,8 +702,14 @@ nfsd4_access(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) { struct nfsd4_access *access = &u->access; + u32 access_full; + + access_full = NFS3_ACCESS_FULL; + if (cstate->minorversion >= 2) + access_full |= NFS4_ACCESS_XALIST | NFS4_ACCESS_XAREAD | + NFS4_ACCESS_XAWRITE; - if (access->ac_req_access & ~NFS3_ACCESS_FULL) + if (access->ac_req_access & ~access_full) return nfserr_inval; access->ac_resp_access = access->ac_req_access; @@ -581,7 +723,7 @@ static void gen_boot_verifier(nfs4_verifier *verifier, struct net *net) BUILD_BUG_ON(2*sizeof(*verf) != sizeof(verifier->data)); - nfsd_copy_boot_verifier(verf, net_generic(net, nfsd_net_id)); + nfsd_copy_write_verifier(verf, net_generic(net, nfsd_net_id)); } static __be32 @@ -600,6 +742,10 @@ nfsd4_create(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) { struct nfsd4_create *create = &u->create; + struct nfsd_attrs attrs = { + .na_iattr = &create->cr_iattr, + .na_seclabel = &create->cr_label, + }; struct svc_fh resfh; __be32 status; dev_t rdev; @@ -615,12 +761,13 @@ nfsd4_create(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (status) return status; + status = nfsd4_acl_to_attr(create->cr_type, create->cr_acl, &attrs); current->fs->umask = create->cr_umask; switch (create->cr_type) { case NF4LNK: status = nfsd_symlink(rqstp, &cstate->current_fh, create->cr_name, create->cr_namelen, - create->cr_data, &resfh); + create->cr_data, &attrs, &resfh); break; case NF4BLK: @@ -631,7 +778,7 @@ nfsd4_create(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, goto out_umask; status = nfsd_create(rqstp, &cstate->current_fh, create->cr_name, create->cr_namelen, - &create->cr_iattr, S_IFBLK, rdev, &resfh); + &attrs, S_IFBLK, rdev, &resfh); break; case NF4CHR: @@ -642,26 +789,26 @@ nfsd4_create(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, goto out_umask; status = nfsd_create(rqstp, &cstate->current_fh, create->cr_name, create->cr_namelen, - &create->cr_iattr,S_IFCHR, rdev, &resfh); + &attrs, S_IFCHR, rdev, &resfh); break; case NF4SOCK: status = nfsd_create(rqstp, &cstate->current_fh, create->cr_name, create->cr_namelen, - &create->cr_iattr, S_IFSOCK, 0, &resfh); + &attrs, S_IFSOCK, 0, &resfh); break; case NF4FIFO: status = nfsd_create(rqstp, &cstate->current_fh, create->cr_name, create->cr_namelen, - &create->cr_iattr, S_IFIFO, 0, &resfh); + &attrs, S_IFIFO, 0, &resfh); break; case NF4DIR: create->cr_iattr.ia_valid &= ~ATTR_SIZE; status = nfsd_create(rqstp, &cstate->current_fh, create->cr_name, create->cr_namelen, - &create->cr_iattr, S_IFDIR, 0, &resfh); + &attrs, S_IFDIR, 0, &resfh); break; default: @@ -671,20 +818,17 @@ nfsd4_create(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (status) goto out; - if (create->cr_label.len) - nfsd4_security_inode_setsecctx(&resfh, &create->cr_label, create->cr_bmval); - - if (create->cr_acl != NULL) - do_set_nfs4_acl(rqstp, &resfh, create->cr_acl, - create->cr_bmval); - - fh_unlock(&cstate->current_fh); + if (attrs.na_labelerr) + create->cr_bmval[2] &= ~FATTR4_WORD2_SECURITY_LABEL; + if (attrs.na_aclerr) + create->cr_bmval[0] &= ~FATTR4_WORD0_ACL; set_change_info(&create->cr_cinfo, &cstate->current_fh); fh_dup2(&cstate->current_fh, &resfh); out: fh_put(&resfh); out_umask: current->fs->umask = 0; + nfsd_attrs_free(&attrs); return status; } @@ -765,12 +909,16 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, __be32 status; read->rd_nf = NULL; - if (read->rd_offset >= OFFSET_MAX) - return nfserr_inval; trace_nfsd_read_start(rqstp, &cstate->current_fh, read->rd_offset, read->rd_length); + read->rd_length = min_t(u32, read->rd_length, svc_max_payload(rqstp)); + if (read->rd_offset > (u64)OFFSET_MAX) + read->rd_offset = (u64)OFFSET_MAX; + if (read->rd_offset + read->rd_length > (u64)OFFSET_MAX) + read->rd_length = (u64)OFFSET_MAX - read->rd_offset; + /* * If we do a zero copy read, then a client will see read data * that reflects the state of the file *after* performing the @@ -780,7 +928,7 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, * the client wants us to do more in this compound: */ if (!nfsd4_last_compound_op(rqstp)) - clear_bit(RQ_SPLICE_OK, &rqstp->rq_flags); + __clear_bit(RQ_SPLICE_OK, &rqstp->rq_flags); /* check stateid */ status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, @@ -853,10 +1001,8 @@ nfsd4_remove(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, return nfserr_grace; status = nfsd_unlink(rqstp, &cstate->current_fh, 0, remove->rm_name, remove->rm_namelen); - if (!status) { - fh_unlock(&cstate->current_fh); + if (!status) set_change_info(&remove->rm_cinfo, &cstate->current_fh); - } return status; } @@ -896,7 +1042,6 @@ nfsd4_secinfo(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, &exp, &dentry); if (err) return err; - fh_unlock(&cstate->current_fh); if (d_really_is_negative(dentry)) { exp_put(exp); err = nfserr_noent; @@ -951,6 +1096,11 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) { struct nfsd4_setattr *setattr = &u->setattr; + struct nfsd_attrs attrs = { + .na_iattr = &setattr->sa_iattr, + .na_seclabel = &setattr->sa_label, + }; + struct inode *inode; __be32 status = nfs_ok; int err; @@ -973,19 +1123,18 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (status) goto out; - if (setattr->sa_acl != NULL) - status = nfsd4_set_nfs4_acl(rqstp, &cstate->current_fh, - setattr->sa_acl); - if (status) - goto out; - if (setattr->sa_label.len) - status = nfsd4_set_nfs4_label(rqstp, &cstate->current_fh, - &setattr->sa_label); + inode = cstate->current_fh.fh_dentry->d_inode; + status = nfsd4_acl_to_attr(S_ISDIR(inode->i_mode) ? NF4DIR : NF4REG, + setattr->sa_acl, &attrs); + if (status) goto out; - status = nfsd_setattr(rqstp, &cstate->current_fh, &setattr->sa_iattr, + status = nfsd_setattr(rqstp, &cstate->current_fh, &attrs, 0, (time64_t)0); + if (!status) + status = nfserrno(attrs.na_labelerr); out: + nfsd_attrs_free(&attrs); fh_drop_write(&cstate->current_fh); return status; } @@ -1001,8 +1150,9 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, unsigned long cnt; int nvecs; - if (write->wr_offset >= OFFSET_MAX) - return nfserr_inval; + if (write->wr_offset > (u64)OFFSET_MAX || + write->wr_offset + write->wr_buflen > (u64)OFFSET_MAX) + return nfserr_fbig; cnt = write->wr_buflen; trace_nfsd_write_start(rqstp, &cstate->current_fh, @@ -1016,8 +1166,7 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, write->wr_how_written = write->wr_stable_how; - nvecs = svc_fill_write_vector(rqstp, write->wr_pagelist, - &write->wr_head, write->wr_buflen); + nvecs = svc_fill_write_vector(rqstp, &write->wr_payload); WARN_ON_ONCE(nvecs > ARRAY_SIZE(rqstp->rq_vec)); status = nfsd_vfs_write(rqstp, &cstate->current_fh, nf, @@ -1085,7 +1234,7 @@ nfsd4_clone(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (status) goto out; - status = nfsd4_clone_file_range(src, clone->cl_src_pos, + status = nfsd4_clone_file_range(rqstp, src, clone->cl_src_pos, dst, clone->cl_dst_pos, clone->cl_count, EX_ISSYNC(cstate->current_fh.fh_export)); @@ -1095,30 +1244,17 @@ out: return status; } -void nfs4_put_copy(struct nfsd4_copy *copy) +static void nfs4_put_copy(struct nfsd4_copy *copy) { if (!refcount_dec_and_test(©->refcount)) return; + kfree(copy->cp_src); kfree(copy); } -static bool -check_and_set_stop_copy(struct nfsd4_copy *copy) -{ - bool value; - - spin_lock(©->cp_clp->async_lock); - value = copy->stopped; - if (!copy->stopped) - copy->stopped = true; - spin_unlock(©->cp_clp->async_lock); - return value; -} - static void nfsd4_stop_copy(struct nfsd4_copy *copy) { - /* only 1 thread should stop the copy */ - if (!check_and_set_stop_copy(copy)) + if (!test_and_set_bit(NFSD4_COPY_F_STOPPED, ©->cp_flags)) kthread_stop(copy->copy_task); nfs4_put_copy(copy); } @@ -1155,7 +1291,82 @@ extern void nfs_sb_deactive(struct super_block *sb); #define NFSD42_INTERSSC_MOUNTOPS "vers=4.2,addr=%s,sec=sys" -/** +/* + * setup a work entry in the ssc delayed unmount list. + */ +static __be32 nfsd4_ssc_setup_dul(struct nfsd_net *nn, char *ipaddr, + struct nfsd4_ssc_umount_item **retwork, struct vfsmount **ss_mnt) +{ + struct nfsd4_ssc_umount_item *ni = NULL; + struct nfsd4_ssc_umount_item *work = NULL; + struct nfsd4_ssc_umount_item *tmp; + DEFINE_WAIT(wait); + + *ss_mnt = NULL; + *retwork = NULL; + work = kzalloc(sizeof(*work), GFP_KERNEL); +try_again: + spin_lock(&nn->nfsd_ssc_lock); + list_for_each_entry_safe(ni, tmp, &nn->nfsd_ssc_mount_list, nsui_list) { + if (strncmp(ni->nsui_ipaddr, ipaddr, sizeof(ni->nsui_ipaddr))) + continue; + /* found a match */ + if (ni->nsui_busy) { + /* wait - and try again */ + prepare_to_wait(&nn->nfsd_ssc_waitq, &wait, + TASK_INTERRUPTIBLE); + spin_unlock(&nn->nfsd_ssc_lock); + + /* allow 20secs for mount/unmount for now - revisit */ + if (signal_pending(current) || + (schedule_timeout(20*HZ) == 0)) { + kfree(work); + return nfserr_eagain; + } + finish_wait(&nn->nfsd_ssc_waitq, &wait); + goto try_again; + } + *ss_mnt = ni->nsui_vfsmount; + refcount_inc(&ni->nsui_refcnt); + spin_unlock(&nn->nfsd_ssc_lock); + kfree(work); + + /* return vfsmount in ss_mnt */ + return 0; + } + if (work) { + strscpy(work->nsui_ipaddr, ipaddr, sizeof(work->nsui_ipaddr) - 1); + refcount_set(&work->nsui_refcnt, 2); + work->nsui_busy = true; + list_add_tail(&work->nsui_list, &nn->nfsd_ssc_mount_list); + *retwork = work; + } + spin_unlock(&nn->nfsd_ssc_lock); + return 0; +} + +static void nfsd4_ssc_update_dul_work(struct nfsd_net *nn, + struct nfsd4_ssc_umount_item *work, struct vfsmount *ss_mnt) +{ + /* set nsui_vfsmount, clear busy flag and wakeup waiters */ + spin_lock(&nn->nfsd_ssc_lock); + work->nsui_vfsmount = ss_mnt; + work->nsui_busy = false; + wake_up_all(&nn->nfsd_ssc_waitq); + spin_unlock(&nn->nfsd_ssc_lock); +} + +static void nfsd4_ssc_cancel_dul_work(struct nfsd_net *nn, + struct nfsd4_ssc_umount_item *work) +{ + spin_lock(&nn->nfsd_ssc_lock); + list_del(&work->nsui_list); + wake_up_all(&nn->nfsd_ssc_waitq); + spin_unlock(&nn->nfsd_ssc_lock); + kfree(work); +} + +/* * Support one copy source server for now. */ static __be32 @@ -1171,6 +1382,8 @@ nfsd4_interssc_connect(struct nl4_server *nss, struct svc_rqst *rqstp, char *ipaddr, *dev_name, *raw_data; int len, raw_len; __be32 status = nfserr_inval; + struct nfsd4_ssc_umount_item *work = NULL; + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); naddr = &nss->u.nl4_addr; tmp_addrlen = rpc_uaddr2sockaddr(SVC_NET(rqstp), naddr->addr, @@ -1219,12 +1432,24 @@ nfsd4_interssc_connect(struct nl4_server *nss, struct svc_rqst *rqstp, goto out_free_rawdata; snprintf(dev_name, len + 5, "%s%s%s:/", startsep, ipaddr, endsep); + status = nfsd4_ssc_setup_dul(nn, ipaddr, &work, &ss_mnt); + if (status) + goto out_free_devname; + if (ss_mnt) + goto out_done; + /* Use an 'internal' mount: SB_KERNMOUNT -> MNT_INTERNAL */ ss_mnt = vfs_kern_mount(type, SB_KERNMOUNT, dev_name, raw_data); module_put(type->owner); - if (IS_ERR(ss_mnt)) + if (IS_ERR(ss_mnt)) { + status = nfserr_nodev; + if (work) + nfsd4_ssc_cancel_dul_work(nn, work); goto out_free_devname; - + } + if (work) + nfsd4_ssc_update_dul_work(nn, work, ss_mnt); +out_done: status = 0; *mount = ss_mnt; @@ -1241,14 +1466,13 @@ out_err: static void nfsd4_interssc_disconnect(struct vfsmount *ss_mnt) { - nfs_sb_deactive(ss_mnt->mnt_sb); + nfs_do_sb_deactive(ss_mnt->mnt_sb); mntput(ss_mnt); } -/** - * nfsd4_setup_inter_ssc - * +/* * Verify COPY destination stateid. + * * Connect to the source server with NFSv4.1. * Create the source struct file for nfsd_copy_range. * Called with COPY cstate: @@ -1271,14 +1495,14 @@ nfsd4_setup_inter_ssc(struct svc_rqst *rqstp, if (status) goto out; - status = nfsd4_interssc_connect(©->cp_src, rqstp, mount); + status = nfsd4_interssc_connect(copy->cp_src, rqstp, mount); if (status) goto out; s_fh = &cstate->save_fh; copy->c_fh.size = s_fh->fh_handle.fh_size; - memcpy(copy->c_fh.data, &s_fh->fh_handle.fh_base, copy->c_fh.size); + memcpy(copy->c_fh.data, &s_fh->fh_handle.fh_raw, copy->c_fh.size); copy->stateid.seqid = cpu_to_be32(s_stid->si_generation); memcpy(copy->stateid.other, (void *)&s_stid->si_opaque, sizeof(stateid_opaque_t)); @@ -1289,13 +1513,45 @@ out: } static void -nfsd4_cleanup_inter_ssc(struct vfsmount *ss_mnt, struct nfsd_file *src, +nfsd4_cleanup_inter_ssc(struct vfsmount *ss_mnt, struct file *filp, struct nfsd_file *dst) { - nfs42_ssc_close(src->nf_file); - nfsd_file_put(src); + bool found = false; + long timeout; + struct nfsd4_ssc_umount_item *tmp; + struct nfsd4_ssc_umount_item *ni = NULL; + struct nfsd_net *nn = net_generic(dst->nf_net, nfsd_net_id); + + nfs42_ssc_close(filp); nfsd_file_put(dst); - mntput(ss_mnt); + fput(filp); + + if (!nn) { + mntput(ss_mnt); + return; + } + spin_lock(&nn->nfsd_ssc_lock); + timeout = msecs_to_jiffies(nfsd4_ssc_umount_timeout); + list_for_each_entry_safe(ni, tmp, &nn->nfsd_ssc_mount_list, nsui_list) { + if (ni->nsui_vfsmount->mnt_sb == ss_mnt->mnt_sb) { + list_del(&ni->nsui_list); + /* + * vfsmount can be shared by multiple exports, + * decrement refcnt. If the count drops to 1 it + * will be unmounted when nsui_expire expires. + */ + refcount_dec(&ni->nsui_refcnt); + ni->nsui_expire = jiffies + timeout; + list_add_tail(&ni->nsui_list, &nn->nfsd_ssc_mount_list); + found = true; + break; + } + } + spin_unlock(&nn->nfsd_ssc_lock); + if (!found) { + mntput(ss_mnt); + return; + } } #else /* CONFIG_NFSD_V4_2_INTER_SSC */ @@ -1311,7 +1567,7 @@ nfsd4_setup_inter_ssc(struct svc_rqst *rqstp, } static void -nfsd4_cleanup_inter_ssc(struct vfsmount *ss_mnt, struct nfsd_file *src, +nfsd4_cleanup_inter_ssc(struct vfsmount *ss_mnt, struct file *filp, struct nfsd_file *dst) { } @@ -1348,14 +1604,19 @@ nfsd4_cleanup_intra_ssc(struct nfsd_file *src, struct nfsd_file *dst) static void nfsd4_cb_offload_release(struct nfsd4_callback *cb) { - struct nfsd4_copy *copy = container_of(cb, struct nfsd4_copy, cp_cb); + struct nfsd4_cb_offload *cbo = + container_of(cb, struct nfsd4_cb_offload, co_cb); - nfs4_put_copy(copy); + kfree(cbo); } static int nfsd4_cb_offload_done(struct nfsd4_callback *cb, struct rpc_task *task) { + struct nfsd4_cb_offload *cbo = + container_of(cb, struct nfsd4_cb_offload, co_cb); + + trace_nfsd_cb_offload_done(&cbo->co_res.cb_stateid, task); return 1; } @@ -1366,40 +1627,61 @@ static const struct nfsd4_callback_ops nfsd4_cb_offload_ops = { static void nfsd4_init_copy_res(struct nfsd4_copy *copy, bool sync) { - copy->cp_res.wr_stable_how = NFS_UNSTABLE; - copy->cp_synchronous = sync; + copy->cp_res.wr_stable_how = + test_bit(NFSD4_COPY_F_COMMITTED, ©->cp_flags) ? + NFS_FILE_SYNC : NFS_UNSTABLE; + nfsd4_copy_set_sync(copy, sync); gen_boot_verifier(©->cp_res.wr_verifier, copy->cp_clp->net); } -static ssize_t _nfsd_copy_file_range(struct nfsd4_copy *copy) +static ssize_t _nfsd_copy_file_range(struct nfsd4_copy *copy, + struct file *dst, + struct file *src) { + errseq_t since; ssize_t bytes_copied = 0; - size_t bytes_total = copy->cp_count; + u64 bytes_total = copy->cp_count; u64 src_pos = copy->cp_src_pos; u64 dst_pos = copy->cp_dst_pos; + int status; + /* See RFC 7862 p.67: */ + if (bytes_total == 0) + bytes_total = ULLONG_MAX; do { if (kthread_should_stop()) break; - bytes_copied = nfsd_copy_file_range(copy->nf_src->nf_file, - src_pos, copy->nf_dst->nf_file, dst_pos, - bytes_total); + bytes_copied = nfsd_copy_file_range(src, src_pos, dst, dst_pos, + bytes_total); if (bytes_copied <= 0) break; bytes_total -= bytes_copied; copy->cp_res.wr_bytes_written += bytes_copied; src_pos += bytes_copied; dst_pos += bytes_copied; - } while (bytes_total > 0 && !copy->cp_synchronous); + } while (bytes_total > 0 && nfsd4_copy_is_async(copy)); + /* for a non-zero asynchronous copy do a commit of data */ + if (nfsd4_copy_is_async(copy) && copy->cp_res.wr_bytes_written > 0) { + since = READ_ONCE(dst->f_wb_err); + status = vfs_fsync_range(dst, copy->cp_dst_pos, + copy->cp_res.wr_bytes_written, 0); + if (!status) + status = filemap_check_wb_err(dst->f_mapping, since); + if (!status) + set_bit(NFSD4_COPY_F_COMMITTED, ©->cp_flags); + } return bytes_copied; } -static __be32 nfsd4_do_copy(struct nfsd4_copy *copy, bool sync) +static __be32 nfsd4_do_copy(struct nfsd4_copy *copy, + struct file *src, struct file *dst, + bool sync) { __be32 status; ssize_t bytes; - bytes = _nfsd_copy_file_range(copy); + bytes = _nfsd_copy_file_range(copy, dst, src); + /* for async copy, we ignore the error, client can always retry * to get the error */ @@ -1409,44 +1691,35 @@ static __be32 nfsd4_do_copy(struct nfsd4_copy *copy, bool sync) nfsd4_init_copy_res(copy, sync); status = nfs_ok; } - - if (!copy->cp_intra) /* Inter server SSC */ - nfsd4_cleanup_inter_ssc(copy->ss_mnt, copy->nf_src, - copy->nf_dst); - else - nfsd4_cleanup_intra_ssc(copy->nf_src, copy->nf_dst); - return status; } -static int dup_copy_fields(struct nfsd4_copy *src, struct nfsd4_copy *dst) +static void dup_copy_fields(struct nfsd4_copy *src, struct nfsd4_copy *dst) { dst->cp_src_pos = src->cp_src_pos; dst->cp_dst_pos = src->cp_dst_pos; dst->cp_count = src->cp_count; - dst->cp_synchronous = src->cp_synchronous; + dst->cp_flags = src->cp_flags; memcpy(&dst->cp_res, &src->cp_res, sizeof(src->cp_res)); memcpy(&dst->fh, &src->fh, sizeof(src->fh)); dst->cp_clp = src->cp_clp; dst->nf_dst = nfsd_file_get(src->nf_dst); - dst->cp_intra = src->cp_intra; - if (src->cp_intra) /* for inter, file_src doesn't exist yet */ + /* for inter, nf_src doesn't exist yet */ + if (!nfsd4_ssc_is_inter(src)) dst->nf_src = nfsd_file_get(src->nf_src); memcpy(&dst->cp_stateid, &src->cp_stateid, sizeof(src->cp_stateid)); - memcpy(&dst->cp_src, &src->cp_src, sizeof(struct nl4_server)); + memcpy(dst->cp_src, src->cp_src, sizeof(struct nl4_server)); memcpy(&dst->stateid, &src->stateid, sizeof(src->stateid)); memcpy(&dst->c_fh, &src->c_fh, sizeof(src->c_fh)); dst->ss_mnt = src->ss_mnt; - - return 0; } static void cleanup_async_copy(struct nfsd4_copy *copy) { nfs4_free_copy_state(copy); nfsd_file_put(copy->nf_dst); - if (copy->cp_intra) + if (!nfsd4_ssc_is_inter(copy)) nfsd_file_put(copy->nf_src); spin_lock(©->cp_clp->async_lock); list_del(©->copies); @@ -1454,42 +1727,64 @@ static void cleanup_async_copy(struct nfsd4_copy *copy) nfs4_put_copy(copy); } +static void nfsd4_send_cb_offload(struct nfsd4_copy *copy, __be32 nfserr) +{ + struct nfsd4_cb_offload *cbo; + + cbo = kzalloc(sizeof(*cbo), GFP_KERNEL); + if (!cbo) + return; + + memcpy(&cbo->co_res, ©->cp_res, sizeof(copy->cp_res)); + memcpy(&cbo->co_fh, ©->fh, sizeof(copy->fh)); + cbo->co_nfserr = nfserr; + + nfsd4_init_cb(&cbo->co_cb, copy->cp_clp, &nfsd4_cb_offload_ops, + NFSPROC4_CLNT_CB_OFFLOAD); + trace_nfsd_cb_offload(copy->cp_clp, &cbo->co_res.cb_stateid, + &cbo->co_fh, copy->cp_count, nfserr); + nfsd4_run_cb(&cbo->co_cb); +} + +/** + * nfsd4_do_async_copy - kthread function for background server-side COPY + * @data: arguments for COPY operation + * + * Return values: + * %0: Copy operation is done. + */ static int nfsd4_do_async_copy(void *data) { struct nfsd4_copy *copy = (struct nfsd4_copy *)data; - struct nfsd4_copy *cb_copy; + __be32 nfserr; - if (!copy->cp_intra) { /* Inter server SSC */ - copy->nf_src = kzalloc(sizeof(struct nfsd_file), GFP_KERNEL); - if (!copy->nf_src) { - copy->nfserr = nfserr_serverfault; - nfsd4_interssc_disconnect(copy->ss_mnt); - goto do_callback; - } - copy->nf_src->nf_file = nfs42_ssc_open(copy->ss_mnt, ©->c_fh, - ©->stateid); - if (IS_ERR(copy->nf_src->nf_file)) { - copy->nfserr = nfserr_offload_denied; + if (nfsd4_ssc_is_inter(copy)) { + struct file *filp; + + filp = nfs42_ssc_open(copy->ss_mnt, ©->c_fh, + ©->stateid); + if (IS_ERR(filp)) { + switch (PTR_ERR(filp)) { + case -EBADF: + nfserr = nfserr_wrong_type; + break; + default: + nfserr = nfserr_offload_denied; + } nfsd4_interssc_disconnect(copy->ss_mnt); goto do_callback; } + nfserr = nfsd4_do_copy(copy, filp, copy->nf_dst->nf_file, + false); + nfsd4_cleanup_inter_ssc(copy->ss_mnt, filp, copy->nf_dst); + } else { + nfserr = nfsd4_do_copy(copy, copy->nf_src->nf_file, + copy->nf_dst->nf_file, false); + nfsd4_cleanup_intra_ssc(copy->nf_src, copy->nf_dst); } - copy->nfserr = nfsd4_do_copy(copy, 0); do_callback: - cb_copy = kzalloc(sizeof(struct nfsd4_copy), GFP_KERNEL); - if (!cb_copy) - goto out; - memcpy(&cb_copy->cp_res, ©->cp_res, sizeof(copy->cp_res)); - cb_copy->cp_clp = copy->cp_clp; - cb_copy->nfserr = copy->nfserr; - memcpy(&cb_copy->fh, ©->fh, sizeof(copy->fh)); - nfsd4_init_cb(&cb_copy->cp_cb, cb_copy->cp_clp, - &nfsd4_cb_offload_ops, NFSPROC4_CLNT_CB_OFFLOAD); - nfsd4_run_cb(&cb_copy->cp_cb); -out: - if (!copy->cp_intra) - kfree(copy->nf_src); + nfsd4_send_cb_offload(copy, nfserr); cleanup_async_copy(copy); return 0; } @@ -1502,8 +1797,8 @@ nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, __be32 status; struct nfsd4_copy *async_copy = NULL; - if (!copy->cp_intra) { /* Inter server SSC */ - if (!inter_copy_offload_enable || copy->cp_synchronous) { + if (nfsd4_ssc_is_inter(copy)) { + if (!inter_copy_offload_enable || nfsd4_copy_is_sync(copy)) { status = nfserr_notsupp; goto out; } @@ -1520,21 +1815,22 @@ nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, copy->cp_clp = cstate->clp; memcpy(©->fh, &cstate->current_fh.fh_handle, sizeof(struct knfsd_fh)); - if (!copy->cp_synchronous) { + if (nfsd4_copy_is_async(copy)) { struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); status = nfserrno(-ENOMEM); async_copy = kzalloc(sizeof(struct nfsd4_copy), GFP_KERNEL); if (!async_copy) goto out_err; + async_copy->cp_src = kmalloc(sizeof(*async_copy->cp_src), GFP_KERNEL); + if (!async_copy->cp_src) + goto out_err; if (!nfs4_init_copy_state(nn, copy)) goto out_err; refcount_set(&async_copy->refcount, 1); - memcpy(©->cp_res.cb_stateid, ©->cp_stateid, - sizeof(copy->cp_stateid)); - status = dup_copy_fields(copy, async_copy); - if (status) - goto out_err; + memcpy(©->cp_res.cb_stateid, ©->cp_stateid.cs_stid, + sizeof(copy->cp_res.cb_stateid)); + dup_copy_fields(copy, async_copy); async_copy->copy_task = kthread_create(nfsd4_do_async_copy, async_copy, "%s", "copy thread"); if (IS_ERR(async_copy->copy_task)) @@ -1546,7 +1842,9 @@ nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, wake_up_process(async_copy->copy_task); status = nfs_ok; } else { - status = nfsd4_do_copy(copy, 1); + status = nfsd4_do_copy(copy, copy->nf_src->nf_file, + copy->nf_dst->nf_file, true); + nfsd4_cleanup_intra_ssc(copy->nf_src, copy->nf_dst); } out: return status; @@ -1554,7 +1852,7 @@ out_err: if (async_copy) cleanup_async_copy(async_copy); status = nfserrno(-ENOMEM); - if (!copy->cp_intra) + if (nfsd4_ssc_is_inter(copy)) nfsd4_interssc_disconnect(copy->ss_mnt); goto out; } @@ -1566,7 +1864,7 @@ find_async_copy(struct nfs4_client *clp, stateid_t *stateid) spin_lock(&clp->async_lock); list_for_each_entry(copy, &clp->async_copies, copies) { - if (memcmp(©->cp_stateid.stid, stateid, NFS4_STATEID_SIZE)) + if (memcmp(©->cp_stateid.cs_stid, stateid, NFS4_STATEID_SIZE)) continue; refcount_inc(©->refcount); spin_unlock(&clp->async_lock); @@ -1620,16 +1918,16 @@ nfsd4_copy_notify(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, cps = nfs4_alloc_init_cpntf_state(nn, stid); if (!cps) goto out; - memcpy(&cn->cpn_cnr_stateid, &cps->cp_stateid.stid, sizeof(stateid_t)); + memcpy(&cn->cpn_cnr_stateid, &cps->cp_stateid.cs_stid, sizeof(stateid_t)); memcpy(&cps->cp_p_stateid, &stid->sc_stateid, sizeof(stateid_t)); memcpy(&cps->cp_p_clid, &clp->cl_clientid, sizeof(clientid_t)); /* For now, only return one server address in cpn_src, the * address used by the client to connect to this server. */ - cn->cpn_src.nl4_type = NL4_NETADDR; + cn->cpn_src->nl4_type = NL4_NETADDR; status = nfsd4_set_netaddr((struct sockaddr *)&rqstp->rq_daddr, - &cn->cpn_src.u.nl4_addr); + &cn->cpn_src->u.nl4_addr); WARN_ON_ONCE(status); if (status) { nfs4_put_cpntf_state(nn, cps); @@ -1880,7 +2178,7 @@ nfsd4_getdeviceinfo(struct svc_rqst *rqstp, nfserr = nfs_ok; if (gdp->gd_maxcount != 0) { nfserr = ops->proc_getdeviceinfo(exp->ex_path.mnt->mnt_sb, - rqstp, cstate->session->se_client, gdp); + rqstp, cstate->clp, gdp); } gdp->gd_notify_types &= ops->notify_types; @@ -2092,19 +2390,81 @@ out: } #endif /* CONFIG_NFSD_PNFS */ +static __be32 +nfsd4_getxattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, + union nfsd4_op_u *u) +{ + struct nfsd4_getxattr *getxattr = &u->getxattr; + + return nfsd_getxattr(rqstp, &cstate->current_fh, + getxattr->getxa_name, &getxattr->getxa_buf, + &getxattr->getxa_len); +} + +static __be32 +nfsd4_setxattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, + union nfsd4_op_u *u) +{ + struct nfsd4_setxattr *setxattr = &u->setxattr; + __be32 ret; + + if (opens_in_grace(SVC_NET(rqstp))) + return nfserr_grace; + + ret = nfsd_setxattr(rqstp, &cstate->current_fh, setxattr->setxa_name, + setxattr->setxa_buf, setxattr->setxa_len, + setxattr->setxa_flags); + + if (!ret) + set_change_info(&setxattr->setxa_cinfo, &cstate->current_fh); + + return ret; +} + +static __be32 +nfsd4_listxattrs(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, + union nfsd4_op_u *u) +{ + /* + * Get the entire list, then copy out only the user attributes + * in the encode function. + */ + return nfsd_listxattr(rqstp, &cstate->current_fh, + &u->listxattrs.lsxa_buf, &u->listxattrs.lsxa_len); +} + +static __be32 +nfsd4_removexattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, + union nfsd4_op_u *u) +{ + struct nfsd4_removexattr *removexattr = &u->removexattr; + __be32 ret; + + if (opens_in_grace(SVC_NET(rqstp))) + return nfserr_grace; + + ret = nfsd_removexattr(rqstp, &cstate->current_fh, + removexattr->rmxa_name); + + if (!ret) + set_change_info(&removexattr->rmxa_cinfo, &cstate->current_fh); + + return ret; +} + /* * NULL call. */ static __be32 nfsd4_proc_null(struct svc_rqst *rqstp) { - return nfs_ok; + return rpc_success; } static inline void nfsd4_increment_op_stats(u32 opnum) { if (opnum >= FIRST_NFS4_OP && opnum <= LAST_NFS4_OP) - nfsdstats.nfs4_opcount[opnum]++; + percpu_counter_inc(&nfsdstats.counter[NFSD_STATS_NFS4_OP(opnum)]); } static const struct nfsd4_operation nfsd4_ops[]; @@ -2194,25 +2554,6 @@ static bool need_wrongsec_check(struct svc_rqst *rqstp) return !(nextd->op_flags & OP_HANDLES_WRONGSEC); } -static void svcxdr_init_encode(struct svc_rqst *rqstp, - struct nfsd4_compoundres *resp) -{ - struct xdr_stream *xdr = &resp->xdr; - struct xdr_buf *buf = &rqstp->rq_res; - struct kvec *head = buf->head; - - xdr->buf = buf; - xdr->iov = head; - xdr->p = head->iov_base + head->iov_len; - xdr->end = head->iov_base + PAGE_SIZE - rqstp->rq_auth_slack; - /* Tail and page_len should be zero at this point: */ - buf->len = buf->head[0].iov_len; - xdr->scratch.iov_len = 0; - xdr->page_ptr = buf->pages - 1; - buf->buflen = PAGE_SIZE * (1 + rqstp->rq_page_end - buf->pages) - - rqstp->rq_auth_slack; -} - #ifdef CONFIG_NFSD_V4_2_INTER_SSC static void check_if_stalefh_allowed(struct nfsd4_compoundargs *args) @@ -2240,7 +2581,7 @@ check_if_stalefh_allowed(struct nfsd4_compoundargs *args) return; } putfh = (struct nfsd4_putfh *)&saved_op->u; - if (!copy->cp_intra) + if (nfsd4_ssc_is_inter(copy)) putfh->no_verify = true; } } @@ -2267,21 +2608,26 @@ nfsd4_proc_compound(struct svc_rqst *rqstp) struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); __be32 status; - svcxdr_init_encode(rqstp, resp); - resp->tagp = resp->xdr.p; + resp->xdr = &rqstp->rq_res_stream; + resp->statusp = resp->xdr->p; + + /* reserve space for: NFS status code */ + xdr_reserve_space(resp->xdr, XDR_UNIT); + /* reserve space for: taglen, tag, and opcnt */ - xdr_reserve_space(&resp->xdr, 8 + args->taglen); + xdr_reserve_space(resp->xdr, XDR_UNIT * 2 + args->taglen); resp->taglen = args->taglen; resp->tag = args->tag; resp->rqstp = rqstp; cstate->minorversion = args->minorversion; fh_init(current_fh, NFS4_FHSIZE); fh_init(save_fh, NFS4_FHSIZE); + /* * Don't use the deferral mechanism for NFSv4; compounds make it * too hard to avoid non-idempotency problems. */ - clear_bit(RQ_USEDEFERRAL, &rqstp->rq_flags); + __clear_bit(RQ_USEDEFERRAL, &rqstp->rq_flags); /* * According to RFC3010, this takes precedence over all other errors. @@ -2289,9 +2635,6 @@ nfsd4_proc_compound(struct svc_rqst *rqstp) status = nfserr_minor_vers_mismatch; if (nfsd_minorversion(nn, args->minorversion, NFSD_TEST) <= 0) goto out; - status = nfserr_resource; - if (args->opcnt > NFSD_MAX_OPS_PER_COMPOUND) - goto out; status = nfs41_check_op_ordering(args); if (status) { @@ -2302,10 +2645,22 @@ nfsd4_proc_compound(struct svc_rqst *rqstp) } check_if_stalefh_allowed(args); - trace_nfsd_compound(rqstp, args->opcnt); + rqstp->rq_lease_breaker = (void **)&cstate->clp; + + trace_nfsd_compound(rqstp, args->tag, args->taglen, args->client_opcnt); while (!status && resp->opcnt < args->opcnt) { op = &args->ops[resp->opcnt++]; + if (unlikely(resp->opcnt == NFSD_MAX_OPS_PER_COMPOUND)) { + /* If there are still more operations to process, + * stop here and report NFS4ERR_RESOURCE. */ + if (cstate->minorversion == 0 && + args->client_opcnt > resp->opcnt) { + op->status = nfserr_resource; + goto encode_op; + } + } + /* * The XDR decode routines may have pre-set op->status; * for example, if there is a miscellaneous XDR error @@ -2329,13 +2684,13 @@ nfsd4_proc_compound(struct svc_rqst *rqstp) goto encode_op; } - fh_clear_wcc(current_fh); + fh_clear_pre_post_attrs(current_fh); /* If op is non-idempotent */ if (op->opdesc->op_flags & OP_MODIFIES_SOMETHING) { /* * Don't execute this op if we couldn't encode a - * succesful reply: + * successful reply: */ u32 plen = op->opdesc->op_rsize_bop(rqstp, op); /* @@ -2374,29 +2729,28 @@ nfsd4_proc_compound(struct svc_rqst *rqstp) encode_op: if (op->status == nfserr_replay_me) { op->replay = &cstate->replay_owner->so_replay; - nfsd4_encode_replay(&resp->xdr, op); + nfsd4_encode_replay(resp->xdr, op); status = op->status = op->replay->rp_status; } else { nfsd4_encode_operation(resp, op); status = op->status; } - trace_nfsd_compound_status(args->opcnt, resp->opcnt, status, - nfsd4_op_name(op->opnum)); + trace_nfsd_compound_status(args->client_opcnt, resp->opcnt, + status, nfsd4_op_name(op->opnum)); nfsd4_cstate_clear_replay(cstate); nfsd4_increment_op_stats(op->opnum); } - cstate->status = status; fh_put(current_fh); fh_put(save_fh); BUG_ON(cstate->replay_owner); out: + cstate->status = status; /* Reset deferral mechanism for RPC deferrals */ - set_bit(RQ_USEDEFERRAL, &rqstp->rq_flags); - dprintk("nfsv4 compound returned %d\n", ntohl(status)); - return status; + __set_bit(RQ_USEDEFERRAL, &rqstp->rq_flags); + return rpc_success; } #define op_encode_hdr_size (2) @@ -2417,28 +2771,49 @@ out: #define op_encode_channel_attrs_maxsz (6 + 1 + 1) -static inline u32 nfsd4_only_status_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +/* + * The _rsize() helpers are invoked by the NFSv4 COMPOUND decoder, which + * is called before sunrpc sets rq_res.buflen. Thus we have to compute + * the maximum payload size here, based on transport limits and the size + * of the remaining space in the rq_pages array. + */ +static u32 nfsd4_max_payload(const struct svc_rqst *rqstp) +{ + u32 buflen; + + buflen = (rqstp->rq_page_end - rqstp->rq_next_page) * PAGE_SIZE; + buflen -= rqstp->rq_auth_slack; + buflen -= rqstp->rq_res.head[0].iov_len; + return min_t(u32, buflen, svc_max_payload(rqstp)); +} + +static u32 nfsd4_only_status_rsize(const struct svc_rqst *rqstp, + const struct nfsd4_op *op) { return (op_encode_hdr_size) * sizeof(__be32); } -static inline u32 nfsd4_status_stateid_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +static u32 nfsd4_status_stateid_rsize(const struct svc_rqst *rqstp, + const struct nfsd4_op *op) { return (op_encode_hdr_size + op_encode_stateid_maxsz)* sizeof(__be32); } -static inline u32 nfsd4_access_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +static u32 nfsd4_access_rsize(const struct svc_rqst *rqstp, + const struct nfsd4_op *op) { /* ac_supported, ac_resp_access */ return (op_encode_hdr_size + 2)* sizeof(__be32); } -static inline u32 nfsd4_commit_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +static u32 nfsd4_commit_rsize(const struct svc_rqst *rqstp, + const struct nfsd4_op *op) { return (op_encode_hdr_size + op_encode_verifier_maxsz) * sizeof(__be32); } -static inline u32 nfsd4_create_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +static u32 nfsd4_create_rsize(const struct svc_rqst *rqstp, + const struct nfsd4_op *op) { return (op_encode_hdr_size + op_encode_change_info_maxsz + nfs4_fattr_bitmap_maxsz) * sizeof(__be32); @@ -2449,17 +2824,17 @@ static inline u32 nfsd4_create_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op * the op prematurely if the estimate is too large. We may turn off splice * reads unnecessarily. */ -static inline u32 nfsd4_getattr_rsize(struct svc_rqst *rqstp, - struct nfsd4_op *op) +static u32 nfsd4_getattr_rsize(const struct svc_rqst *rqstp, + const struct nfsd4_op *op) { - u32 *bmap = op->u.getattr.ga_bmval; + const u32 *bmap = op->u.getattr.ga_bmval; u32 bmap0 = bmap[0], bmap1 = bmap[1], bmap2 = bmap[2]; u32 ret = 0; if (bmap0 & FATTR4_WORD0_ACL) - return svc_max_payload(rqstp); + return nfsd4_max_payload(rqstp); if (bmap0 & FATTR4_WORD0_FS_LOCATIONS) - return svc_max_payload(rqstp); + return nfsd4_max_payload(rqstp); if (bmap1 & FATTR4_WORD1_OWNER) { ret += IDMAP_NAMESZ + 4; @@ -2487,24 +2862,28 @@ static inline u32 nfsd4_getattr_rsize(struct svc_rqst *rqstp, return ret; } -static inline u32 nfsd4_getfh_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +static u32 nfsd4_getfh_rsize(const struct svc_rqst *rqstp, + const struct nfsd4_op *op) { return (op_encode_hdr_size + 1) * sizeof(__be32) + NFS4_FHSIZE; } -static inline u32 nfsd4_link_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +static u32 nfsd4_link_rsize(const struct svc_rqst *rqstp, + const struct nfsd4_op *op) { return (op_encode_hdr_size + op_encode_change_info_maxsz) * sizeof(__be32); } -static inline u32 nfsd4_lock_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +static u32 nfsd4_lock_rsize(const struct svc_rqst *rqstp, + const struct nfsd4_op *op) { return (op_encode_hdr_size + op_encode_lock_denied_maxsz) * sizeof(__be32); } -static inline u32 nfsd4_open_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +static u32 nfsd4_open_rsize(const struct svc_rqst *rqstp, + const struct nfsd4_op *op) { return (op_encode_hdr_size + op_encode_stateid_maxsz + op_encode_change_info_maxsz + 1 @@ -2512,80 +2891,99 @@ static inline u32 nfsd4_open_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) + op_encode_delegation_maxsz) * sizeof(__be32); } -static inline u32 nfsd4_read_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +static u32 nfsd4_read_rsize(const struct svc_rqst *rqstp, + const struct nfsd4_op *op) { - u32 maxcount = 0, rlen = 0; - - maxcount = svc_max_payload(rqstp); - rlen = min(op->u.read.rd_length, maxcount); + u32 rlen = min(op->u.read.rd_length, nfsd4_max_payload(rqstp)); return (op_encode_hdr_size + 2 + XDR_QUADLEN(rlen)) * sizeof(__be32); } -static inline u32 nfsd4_readdir_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +static u32 nfsd4_read_plus_rsize(const struct svc_rqst *rqstp, + const struct nfsd4_op *op) { - u32 maxcount = 0, rlen = 0; + u32 rlen = min(op->u.read.rd_length, nfsd4_max_payload(rqstp)); + /* + * If we detect that the file changed during hole encoding, then we + * recover by encoding the remaining reply as data. This means we need + * to set aside enough room to encode two data segments. + */ + u32 seg_len = 2 * (1 + 2 + 1); + + return (op_encode_hdr_size + 2 + seg_len + XDR_QUADLEN(rlen)) * sizeof(__be32); +} - maxcount = svc_max_payload(rqstp); - rlen = min(op->u.readdir.rd_maxcount, maxcount); +static u32 nfsd4_readdir_rsize(const struct svc_rqst *rqstp, + const struct nfsd4_op *op) +{ + u32 rlen = min(op->u.readdir.rd_maxcount, nfsd4_max_payload(rqstp)); return (op_encode_hdr_size + op_encode_verifier_maxsz + XDR_QUADLEN(rlen)) * sizeof(__be32); } -static inline u32 nfsd4_readlink_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +static u32 nfsd4_readlink_rsize(const struct svc_rqst *rqstp, + const struct nfsd4_op *op) { return (op_encode_hdr_size + 1) * sizeof(__be32) + PAGE_SIZE; } -static inline u32 nfsd4_remove_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +static u32 nfsd4_remove_rsize(const struct svc_rqst *rqstp, + const struct nfsd4_op *op) { return (op_encode_hdr_size + op_encode_change_info_maxsz) * sizeof(__be32); } -static inline u32 nfsd4_rename_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +static u32 nfsd4_rename_rsize(const struct svc_rqst *rqstp, + const struct nfsd4_op *op) { return (op_encode_hdr_size + op_encode_change_info_maxsz + op_encode_change_info_maxsz) * sizeof(__be32); } -static inline u32 nfsd4_sequence_rsize(struct svc_rqst *rqstp, - struct nfsd4_op *op) +static u32 nfsd4_sequence_rsize(const struct svc_rqst *rqstp, + const struct nfsd4_op *op) { return (op_encode_hdr_size + XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + 5) * sizeof(__be32); } -static inline u32 nfsd4_test_stateid_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +static u32 nfsd4_test_stateid_rsize(const struct svc_rqst *rqstp, + const struct nfsd4_op *op) { return (op_encode_hdr_size + 1 + op->u.test_stateid.ts_num_ids) * sizeof(__be32); } -static inline u32 nfsd4_setattr_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +static u32 nfsd4_setattr_rsize(const struct svc_rqst *rqstp, + const struct nfsd4_op *op) { return (op_encode_hdr_size + nfs4_fattr_bitmap_maxsz) * sizeof(__be32); } -static inline u32 nfsd4_secinfo_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +static u32 nfsd4_secinfo_rsize(const struct svc_rqst *rqstp, + const struct nfsd4_op *op) { return (op_encode_hdr_size + RPC_AUTH_MAXFLAVOR * (4 + XDR_QUADLEN(GSS_OID_MAX_LEN))) * sizeof(__be32); } -static inline u32 nfsd4_setclientid_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +static u32 nfsd4_setclientid_rsize(const struct svc_rqst *rqstp, + const struct nfsd4_op *op) { return (op_encode_hdr_size + 2 + XDR_QUADLEN(NFS4_VERIFIER_SIZE)) * sizeof(__be32); } -static inline u32 nfsd4_write_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +static u32 nfsd4_write_rsize(const struct svc_rqst *rqstp, + const struct nfsd4_op *op) { return (op_encode_hdr_size + 2 + op_encode_verifier_maxsz) * sizeof(__be32); } -static inline u32 nfsd4_exchange_id_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +static u32 nfsd4_exchange_id_rsize(const struct svc_rqst *rqstp, + const struct nfsd4_op *op) { return (op_encode_hdr_size + 2 + 1 + /* eir_clientid, eir_sequenceid */\ 1 + 1 + /* eir_flags, spr_how */\ @@ -2599,14 +2997,16 @@ static inline u32 nfsd4_exchange_id_rsize(struct svc_rqst *rqstp, struct nfsd4_o 0 /* ignored eir_server_impl_id contents */) * sizeof(__be32); } -static inline u32 nfsd4_bind_conn_to_session_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +static u32 nfsd4_bind_conn_to_session_rsize(const struct svc_rqst *rqstp, + const struct nfsd4_op *op) { return (op_encode_hdr_size + \ XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + /* bctsr_sessid */\ 2 /* bctsr_dir, use_conn_in_rdma_mode */) * sizeof(__be32); } -static inline u32 nfsd4_create_session_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +static u32 nfsd4_create_session_rsize(const struct svc_rqst *rqstp, + const struct nfsd4_op *op) { return (op_encode_hdr_size + \ XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + /* sessionid */\ @@ -2615,7 +3015,8 @@ static inline u32 nfsd4_create_session_rsize(struct svc_rqst *rqstp, struct nfsd op_encode_channel_attrs_maxsz) * sizeof(__be32); } -static inline u32 nfsd4_copy_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +static u32 nfsd4_copy_rsize(const struct svc_rqst *rqstp, + const struct nfsd4_op *op) { return (op_encode_hdr_size + 1 /* wr_callback */ + @@ -2627,16 +3028,16 @@ static inline u32 nfsd4_copy_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) 1 /* cr_synchronous */) * sizeof(__be32); } -static inline u32 nfsd4_offload_status_rsize(struct svc_rqst *rqstp, - struct nfsd4_op *op) +static u32 nfsd4_offload_status_rsize(const struct svc_rqst *rqstp, + const struct nfsd4_op *op) { return (op_encode_hdr_size + 2 /* osr_count */ + 1 /* osr_complete<1> optional 0 for now */) * sizeof(__be32); } -static inline u32 nfsd4_copy_notify_rsize(struct svc_rqst *rqstp, - struct nfsd4_op *op) +static u32 nfsd4_copy_notify_rsize(const struct svc_rqst *rqstp, + const struct nfsd4_op *op) { return (op_encode_hdr_size + 3 /* cnr_lease_time */ + @@ -2651,12 +3052,10 @@ static inline u32 nfsd4_copy_notify_rsize(struct svc_rqst *rqstp, } #ifdef CONFIG_NFSD_PNFS -static inline u32 nfsd4_getdeviceinfo_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +static u32 nfsd4_getdeviceinfo_rsize(const struct svc_rqst *rqstp, + const struct nfsd4_op *op) { - u32 maxcount = 0, rlen = 0; - - maxcount = svc_max_payload(rqstp); - rlen = min(op->u.getdeviceinfo.gd_maxcount, maxcount); + u32 rlen = min(op->u.getdeviceinfo.gd_maxcount, nfsd4_max_payload(rqstp)); return (op_encode_hdr_size + 1 /* gd_layout_type*/ + @@ -2669,7 +3068,8 @@ static inline u32 nfsd4_getdeviceinfo_rsize(struct svc_rqst *rqstp, struct nfsd4 * so we need to define an arbitrary upper bound here. */ #define MAX_LAYOUT_SIZE 128 -static inline u32 nfsd4_layoutget_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +static u32 nfsd4_layoutget_rsize(const struct svc_rqst *rqstp, + const struct nfsd4_op *op) { return (op_encode_hdr_size + 1 /* logr_return_on_close */ + @@ -2678,14 +3078,16 @@ static inline u32 nfsd4_layoutget_rsize(struct svc_rqst *rqstp, struct nfsd4_op MAX_LAYOUT_SIZE) * sizeof(__be32); } -static inline u32 nfsd4_layoutcommit_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +static u32 nfsd4_layoutcommit_rsize(const struct svc_rqst *rqstp, + const struct nfsd4_op *op) { return (op_encode_hdr_size + 1 /* locr_newsize */ + 2 /* ns_size */) * sizeof(__be32); } -static inline u32 nfsd4_layoutreturn_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +static u32 nfsd4_layoutreturn_rsize(const struct svc_rqst *rqstp, + const struct nfsd4_op *op) { return (op_encode_hdr_size + 1 /* lrs_stateid */ + @@ -2694,11 +3096,42 @@ static inline u32 nfsd4_layoutreturn_rsize(struct svc_rqst *rqstp, struct nfsd4_ #endif /* CONFIG_NFSD_PNFS */ -static inline u32 nfsd4_seek_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) +static u32 nfsd4_seek_rsize(const struct svc_rqst *rqstp, + const struct nfsd4_op *op) { return (op_encode_hdr_size + 3) * sizeof(__be32); } +static u32 nfsd4_getxattr_rsize(const struct svc_rqst *rqstp, + const struct nfsd4_op *op) +{ + u32 rlen = min_t(u32, XATTR_SIZE_MAX, nfsd4_max_payload(rqstp)); + + return (op_encode_hdr_size + 1 + XDR_QUADLEN(rlen)) * sizeof(__be32); +} + +static u32 nfsd4_setxattr_rsize(const struct svc_rqst *rqstp, + const struct nfsd4_op *op) +{ + return (op_encode_hdr_size + op_encode_change_info_maxsz) + * sizeof(__be32); +} +static u32 nfsd4_listxattrs_rsize(const struct svc_rqst *rqstp, + const struct nfsd4_op *op) +{ + u32 rlen = min(op->u.listxattrs.lsxa_maxcount, nfsd4_max_payload(rqstp)); + + return (op_encode_hdr_size + 4 + XDR_QUADLEN(rlen)) * sizeof(__be32); +} + +static u32 nfsd4_removexattr_rsize(const struct svc_rqst *rqstp, + const struct nfsd4_op *op) +{ + return (op_encode_hdr_size + op_encode_change_info_maxsz) + * sizeof(__be32); +} + + static const struct nfsd4_operation nfsd4_ops[] = { [OP_ACCESS] = { .op_func = nfsd4_access, @@ -3058,6 +3491,13 @@ static const struct nfsd4_operation nfsd4_ops[] = { .op_name = "OP_COPY", .op_rsize_bop = nfsd4_copy_rsize, }, + [OP_READ_PLUS] = { + .op_func = nfsd4_read, + .op_release = nfsd4_read_release, + .op_name = "OP_READ_PLUS", + .op_rsize_bop = nfsd4_read_plus_rsize, + .op_get_currentstateid = nfsd4_get_readstateid, + }, [OP_SEEK] = { .op_func = nfsd4_seek, .op_name = "OP_SEEK", @@ -3080,6 +3520,28 @@ static const struct nfsd4_operation nfsd4_ops[] = { .op_name = "OP_COPY_NOTIFY", .op_rsize_bop = nfsd4_copy_notify_rsize, }, + [OP_GETXATTR] = { + .op_func = nfsd4_getxattr, + .op_name = "OP_GETXATTR", + .op_rsize_bop = nfsd4_getxattr_rsize, + }, + [OP_SETXATTR] = { + .op_func = nfsd4_setxattr, + .op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME, + .op_name = "OP_SETXATTR", + .op_rsize_bop = nfsd4_setxattr_rsize, + }, + [OP_LISTXATTRS] = { + .op_func = nfsd4_listxattrs, + .op_name = "OP_LISTXATTRS", + .op_rsize_bop = nfsd4_listxattrs_rsize, + }, + [OP_REMOVEXATTR] = { + .op_func = nfsd4_removexattr, + .op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME, + .op_name = "OP_REMOVEXATTR", + .op_rsize_bop = nfsd4_removexattr_rsize, + }, }; /** @@ -3096,7 +3558,7 @@ bool nfsd4_spo_must_allow(struct svc_rqst *rqstp) { struct nfsd4_compoundres *resp = rqstp->rq_resp; struct nfsd4_compoundargs *argp = rqstp->rq_argp; - struct nfsd4_op *this = &argp->ops[resp->opcnt - 1]; + struct nfsd4_op *this; struct nfsd4_compound_state *cstate = &resp->cstate; struct nfs4_op_map *allow = &cstate->clp->cl_spo_must_allow; u32 opiter; @@ -3104,7 +3566,7 @@ bool nfsd4_spo_must_allow(struct svc_rqst *rqstp) if (!cstate->minorversion) return false; - if (cstate->spo_must_allowed == true) + if (cstate->spo_must_allowed) return true; opiter = resp->opcnt; @@ -3133,7 +3595,7 @@ int nfsd4_max_reply(struct svc_rqst *rqstp, struct nfsd4_op *op) void warn_on_nonidempotent_op(struct nfsd4_op *op) { if (OPDESC(op)->op_flags & OP_MODIFIES_SOMETHING) { - pr_err("unable to encode reply to nonidempotent op %d (%s)\n", + pr_err("unable to encode reply to nonidempotent op %u (%s)\n", op->opnum, nfsd4_op_name(op->opnum)); WARN_ON_ONCE(1); } @@ -3146,27 +3608,29 @@ static const char *nfsd4_op_name(unsigned opnum) return "unknown_operation"; } -#define nfsd4_voidres nfsd4_voidargs -struct nfsd4_voidargs { int dummy; }; - static const struct svc_procedure nfsd_procedures4[2] = { [NFSPROC4_NULL] = { .pc_func = nfsd4_proc_null, - .pc_encode = nfs4svc_encode_voidres, - .pc_argsize = sizeof(struct nfsd4_voidargs), - .pc_ressize = sizeof(struct nfsd4_voidres), + .pc_decode = nfssvc_decode_voidarg, + .pc_encode = nfssvc_encode_voidres, + .pc_argsize = sizeof(struct nfsd_voidargs), + .pc_argzero = sizeof(struct nfsd_voidargs), + .pc_ressize = sizeof(struct nfsd_voidres), .pc_cachetype = RC_NOCACHE, .pc_xdrressize = 1, + .pc_name = "NULL", }, [NFSPROC4_COMPOUND] = { .pc_func = nfsd4_proc_compound, .pc_decode = nfs4svc_decode_compoundargs, .pc_encode = nfs4svc_encode_compoundres, .pc_argsize = sizeof(struct nfsd4_compoundargs), + .pc_argzero = offsetof(struct nfsd4_compoundargs, iops), .pc_ressize = sizeof(struct nfsd4_compoundres), .pc_release = nfsd4_release_compoundargs, .pc_cachetype = RC_NOCACHE, .pc_xdrressize = NFSD_BUFSIZE/4, + .pc_name = "COMPOUND", }, }; @@ -3181,9 +3645,3 @@ const struct svc_version nfsd_version4 = { .vs_rpcb_optnl = true, .vs_need_cong_ctrl = true, }; - -/* - * Local variables: - * c-basic-offset: 8 - * End: - */ diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index a8fb18609146..78b8cd9651d5 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c @@ -127,16 +127,8 @@ nfs4_make_rec_clidname(char *dname, const struct xdr_netobj *clname) goto out; } - { - SHASH_DESC_ON_STACK(desc, tfm); - - desc->tfm = tfm; - - status = crypto_shash_digest(desc, clname->data, clname->len, - cksum.data); - shash_desc_zero(desc); - } - + status = crypto_shash_tfm_digest(tfm, clname->data, clname->len, + cksum.data); if (status) goto out; @@ -241,7 +233,7 @@ nfsd4_create_clid_dir(struct nfs4_client *clp) * as well be forgiving and just succeed silently. */ goto out_put; - status = vfs_mkdir(d_inode(dir), dentry, S_IRWXU); + status = vfs_mkdir(&init_user_ns, d_inode(dir), dentry, S_IRWXU); out_put: dput(dentry); out_unlock: @@ -274,7 +266,7 @@ struct nfs4_dir_ctx { struct list_head names; }; -static int +static bool nfsd4_build_namelist(struct dir_context *__ctx, const char *name, int namlen, loff_t offset, u64 ino, unsigned int d_type) { @@ -283,14 +275,14 @@ nfsd4_build_namelist(struct dir_context *__ctx, const char *name, int namlen, struct name_list *entry; if (namlen != HEXDIR_LEN - 1) - return 0; + return true; entry = kmalloc(sizeof(struct name_list), GFP_KERNEL); if (entry == NULL) - return -ENOMEM; + return false; memcpy(entry->name, name, HEXDIR_LEN - 1); entry->name[HEXDIR_LEN - 1] = '\0'; list_add(&entry->list, &ctx->names); - return 0; + return true; } static int @@ -361,7 +353,7 @@ nfsd4_unlink_clid_dir(char *name, int namlen, struct nfsd_net *nn) status = -ENOENT; if (d_really_is_negative(dentry)) goto out; - status = vfs_rmdir(d_inode(dir), dentry); + status = vfs_rmdir(&init_user_ns, d_inode(dir), dentry); out: dput(dentry); out_unlock: @@ -451,7 +443,7 @@ purge_old(struct dentry *parent, struct dentry *child, struct nfsd_net *nn) if (nfs4_has_reclaimed_state(name, nn)) goto out_free; - status = vfs_rmdir(d_inode(parent), child); + status = vfs_rmdir(&init_user_ns, d_inode(parent), child); if (status) printk("failed to remove client recovery directory %pd\n", child); @@ -634,7 +626,7 @@ nfsd4_legacy_tracking_init(struct net *net) status = nfsd4_load_reboot_recovery_data(net); if (status) goto err; - printk("NFSD: Using legacy client tracking operations.\n"); + pr_info("NFSD: Using legacy client tracking operations.\n"); return 0; err: @@ -755,13 +747,11 @@ struct cld_upcall { }; static int -__cld_pipe_upcall(struct rpc_pipe *pipe, void *cmsg) +__cld_pipe_upcall(struct rpc_pipe *pipe, void *cmsg, struct nfsd_net *nn) { int ret; struct rpc_pipe_msg msg; struct cld_upcall *cup = container_of(cmsg, struct cld_upcall, cu_u); - struct nfsd_net *nn = net_generic(pipe->dentry->d_sb->s_fs_info, - nfsd_net_id); memset(&msg, 0, sizeof(msg)); msg.data = cmsg; @@ -781,7 +771,7 @@ out: } static int -cld_pipe_upcall(struct rpc_pipe *pipe, void *cmsg) +cld_pipe_upcall(struct rpc_pipe *pipe, void *cmsg, struct nfsd_net *nn) { int ret; @@ -790,7 +780,7 @@ cld_pipe_upcall(struct rpc_pipe *pipe, void *cmsg) * upcalls queued. */ do { - ret = __cld_pipe_upcall(pipe, cmsg); + ret = __cld_pipe_upcall(pipe, cmsg, nn); } while (ret == -EAGAIN); return ret; @@ -817,16 +807,18 @@ __cld_pipe_inprogress_downcall(const struct cld_msg_v2 __user *cmsg, if (get_user(namelen, &ci->cc_name.cn_len)) return -EFAULT; name.data = memdup_user(&ci->cc_name.cn_id, namelen); - if (IS_ERR_OR_NULL(name.data)) - return -EFAULT; + if (IS_ERR(name.data)) + return PTR_ERR(name.data); name.len = namelen; get_user(princhashlen, &ci->cc_princhash.cp_len); if (princhashlen > 0) { princhash.data = memdup_user( &ci->cc_princhash.cp_data, princhashlen); - if (IS_ERR_OR_NULL(princhash.data)) - return -EFAULT; + if (IS_ERR(princhash.data)) { + kfree(name.data); + return PTR_ERR(princhash.data); + } princhash.len = princhashlen; } else princhash.len = 0; @@ -837,8 +829,8 @@ __cld_pipe_inprogress_downcall(const struct cld_msg_v2 __user *cmsg, if (get_user(namelen, &cnm->cn_len)) return -EFAULT; name.data = memdup_user(&cnm->cn_id, namelen); - if (IS_ERR_OR_NULL(name.data)) - return -EFAULT; + if (IS_ERR(name.data)) + return PTR_ERR(name.data); name.len = namelen; } if (name.len > 5 && memcmp(name.data, "hash:", 5) == 0) { @@ -1038,7 +1030,7 @@ nfsd4_init_cld_pipe(struct net *net) status = __nfsd4_init_cld_pipe(net); if (!status) - printk("NFSD: Using old nfsdcld client tracking operations.\n"); + pr_info("NFSD: Using old nfsdcld client tracking operations.\n"); return status; } @@ -1123,7 +1115,7 @@ nfsd4_cld_create(struct nfs4_client *clp) memcpy(cup->cu_u.cu_msg.cm_u.cm_name.cn_id, clp->cl_name.data, clp->cl_name.len); - ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg); + ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg, nn); if (!ret) { ret = cup->cu_u.cu_msg.cm_status; set_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags); @@ -1148,7 +1140,6 @@ nfsd4_cld_create_v2(struct nfs4_client *clp) struct crypto_shash *tfm = cn->cn_tfm; struct xdr_netobj cksum; char *principal = NULL; - SHASH_DESC_ON_STACK(desc, tfm); /* Don't upcall if it's already stored */ if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags)) @@ -1170,16 +1161,14 @@ nfsd4_cld_create_v2(struct nfs4_client *clp) else if (clp->cl_cred.cr_principal) principal = clp->cl_cred.cr_principal; if (principal) { - desc->tfm = tfm; cksum.len = crypto_shash_digestsize(tfm); cksum.data = kmalloc(cksum.len, GFP_KERNEL); if (cksum.data == NULL) { ret = -ENOMEM; goto out; } - ret = crypto_shash_digest(desc, principal, strlen(principal), - cksum.data); - shash_desc_zero(desc); + ret = crypto_shash_tfm_digest(tfm, principal, strlen(principal), + cksum.data); if (ret) { kfree(cksum.data); goto out; @@ -1191,7 +1180,7 @@ nfsd4_cld_create_v2(struct nfs4_client *clp) } else cmsg->cm_u.cm_clntinfo.cc_princhash.cp_len = 0; - ret = cld_pipe_upcall(cn->cn_pipe, cmsg); + ret = cld_pipe_upcall(cn->cn_pipe, cmsg, nn); if (!ret) { ret = cmsg->cm_status; set_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags); @@ -1229,7 +1218,7 @@ nfsd4_cld_remove(struct nfs4_client *clp) memcpy(cup->cu_u.cu_msg.cm_u.cm_name.cn_id, clp->cl_name.data, clp->cl_name.len); - ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg); + ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg, nn); if (!ret) { ret = cup->cu_u.cu_msg.cm_status; clear_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags); @@ -1272,7 +1261,7 @@ nfsd4_cld_check_v0(struct nfs4_client *clp) memcpy(cup->cu_u.cu_msg.cm_u.cm_name.cn_id, clp->cl_name.data, clp->cl_name.len); - ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg); + ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg, nn); if (!ret) { ret = cup->cu_u.cu_msg.cm_status; set_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags); @@ -1343,7 +1332,6 @@ nfsd4_cld_check_v2(struct nfs4_client *clp) struct crypto_shash *tfm = cn->cn_tfm; struct xdr_netobj cksum; char *principal = NULL; - SHASH_DESC_ON_STACK(desc, tfm); /* did we already find that this client is stable? */ if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags)) @@ -1381,14 +1369,12 @@ found: principal = clp->cl_cred.cr_principal; if (principal == NULL) return -ENOENT; - desc->tfm = tfm; cksum.len = crypto_shash_digestsize(tfm); cksum.data = kmalloc(cksum.len, GFP_KERNEL); if (cksum.data == NULL) return -ENOENT; - status = crypto_shash_digest(desc, principal, strlen(principal), - cksum.data); - shash_desc_zero(desc); + status = crypto_shash_tfm_digest(tfm, principal, + strlen(principal), cksum.data); if (status) { kfree(cksum.data); return -ENOENT; @@ -1418,7 +1404,7 @@ nfsd4_cld_grace_start(struct nfsd_net *nn) } cup->cu_u.cu_msg.cm_cmd = Cld_GraceStart; - ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg); + ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg, nn); if (!ret) ret = cup->cu_u.cu_msg.cm_status; @@ -1446,7 +1432,7 @@ nfsd4_cld_grace_done_v0(struct nfsd_net *nn) cup->cu_u.cu_msg.cm_cmd = Cld_GraceDone; cup->cu_u.cu_msg.cm_u.cm_gracetime = nn->boot_time; - ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg); + ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg, nn); if (!ret) ret = cup->cu_u.cu_msg.cm_status; @@ -1474,7 +1460,7 @@ nfsd4_cld_grace_done(struct nfsd_net *nn) } cup->cu_u.cu_msg.cm_cmd = Cld_GraceDone; - ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg); + ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg, nn); if (!ret) ret = cup->cu_u.cu_msg.cm_status; @@ -1538,7 +1524,7 @@ nfsd4_cld_get_version(struct nfsd_net *nn) goto out_err; } cup->cu_u.cu_msg.cm_cmd = Cld_GetVersion; - ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg); + ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg, nn); if (!ret) { ret = cup->cu_u.cu_msg.cm_status; if (ret) @@ -1621,7 +1607,7 @@ nfsd4_cld_tracking_init(struct net *net) nfs4_release_reclaim(nn); goto err_remove; } else - printk("NFSD: Using nfsdcld client tracking operations.\n"); + pr_info("NFSD: Using nfsdcld client tracking operations.\n"); return 0; err_remove: @@ -1880,7 +1866,7 @@ nfsd4_umh_cltrack_init(struct net *net) ret = nfsd4_umh_cltrack_upcall("init", NULL, grace_start, NULL); kfree(grace_start); if (!ret) - printk("NFSD: Using UMH upcall client tracking operations.\n"); + pr_info("NFSD: Using UMH upcall client tracking operations.\n"); return ret; } @@ -2172,6 +2158,7 @@ static struct notifier_block nfsd4_cld_block = { int register_cld_notifier(void) { + WARN_ON(!nfsd_net_id); return rpc_pipefs_notifier_register(&nfsd4_cld_block); } diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 65cfe9ab47be..836bd825ca4a 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -43,6 +43,8 @@ #include <linux/sunrpc/addr.h> #include <linux/jhash.h> #include <linux/string_helpers.h> +#include <linux/fsnotify.h> +#include <linux/nfs_ssc.h> #include "xdr4.h" #include "xdr4cb.h" #include "vfs.h" @@ -51,6 +53,7 @@ #include "netns.h" #include "pnfs.h" #include "filecache.h" +#include "trace.h" #define NFSDDBG_FACILITY NFSDDBG_PROC @@ -122,6 +125,23 @@ static void free_session(struct nfsd4_session *); static const struct nfsd4_callback_ops nfsd4_cb_recall_ops; static const struct nfsd4_callback_ops nfsd4_cb_notify_lock_ops; +static struct workqueue_struct *laundry_wq; + +int nfsd4_create_laundry_wq(void) +{ + int rc = 0; + + laundry_wq = alloc_workqueue("%s", WQ_UNBOUND, 0, "nfsd4"); + if (laundry_wq == NULL) + rc = -ENOMEM; + return rc; +} + +void nfsd4_destroy_laundry_wq(void) +{ + destroy_workqueue(laundry_wq); +} + static bool is_session_dead(struct nfsd4_session *ses) { return ses->se_flags & NFS4_SESSION_DEAD; @@ -140,6 +160,13 @@ static bool is_client_expired(struct nfs4_client *clp) return clp->cl_time == 0; } +static void nfsd4_dec_courtesy_client_count(struct nfsd_net *nn, + struct nfs4_client *clp) +{ + if (clp->cl_state != NFSD4_ACTIVE) + atomic_add_unless(&nn->nfsd_courtesy_clients, -1, 0); +} + static __be32 get_client_locked(struct nfs4_client *clp) { struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); @@ -149,6 +176,8 @@ static __be32 get_client_locked(struct nfs4_client *clp) if (is_client_expired(clp)) return nfserr_expired; atomic_inc(&clp->cl_rpc_users); + nfsd4_dec_courtesy_client_count(nn, clp); + clp->cl_state = NFSD4_ACTIVE; return nfs_ok; } @@ -167,11 +196,10 @@ renew_client_locked(struct nfs4_client *clp) return; } - dprintk("renewing client (clientid %08x/%08x)\n", - clp->cl_clientid.cl_boot, - clp->cl_clientid.cl_id); list_move_tail(&clp->cl_lru, &nn->client_lru); clp->cl_time = ktime_get_boottime_seconds(); + nfsd4_dec_courtesy_client_count(nn, clp); + clp->cl_state = NFSD4_ACTIVE; } static void put_client_renew_locked(struct nfs4_client *clp) @@ -246,6 +274,7 @@ find_blocked_lock(struct nfs4_lockowner *lo, struct knfsd_fh *fh, list_for_each_entry(cur, &lo->lo_blocked, nbl_list) { if (fh_match(fh, &cur->nbl_fh)) { list_del_init(&cur->nbl_list); + WARN_ON(list_empty(&cur->nbl_lru)); list_del_init(&cur->nbl_lru); found = cur; break; @@ -267,8 +296,11 @@ find_or_allocate_block(struct nfs4_lockowner *lo, struct knfsd_fh *fh, if (!nbl) { nbl= kmalloc(sizeof(*nbl), GFP_KERNEL); if (nbl) { + INIT_LIST_HEAD(&nbl->nbl_list); + INIT_LIST_HEAD(&nbl->nbl_lru); fh_copy_shallow(&nbl->nbl_fh, fh); locks_init_lock(&nbl->nbl_lock); + kref_init(&nbl->nbl_kref); nfsd4_init_cb(&nbl->nbl_cb, lo->lo_owner.so_client, &nfsd4_cb_notify_lock_ops, NFSPROC4_CLNT_CB_NOTIFY_LOCK); @@ -278,11 +310,20 @@ find_or_allocate_block(struct nfs4_lockowner *lo, struct knfsd_fh *fh, } static void +free_nbl(struct kref *kref) +{ + struct nfsd4_blocked_lock *nbl; + + nbl = container_of(kref, struct nfsd4_blocked_lock, nbl_kref); + kfree(nbl); +} + +static void free_blocked_lock(struct nfsd4_blocked_lock *nbl) { locks_delete_block(&nbl->nbl_lock); locks_release_private(&nbl->nbl_lock); - kfree(nbl); + kref_put(&nbl->nbl_kref, free_nbl); } static void @@ -300,6 +341,7 @@ remove_blocked_locks(struct nfs4_lockowner *lo) struct nfsd4_blocked_lock, nbl_list); list_del_init(&nbl->nbl_list); + WARN_ON(list_empty(&nbl->nbl_lru)); list_move(&nbl->nbl_lru, &reaplist); } spin_unlock(&nn->blocked_locks_lock); @@ -324,6 +366,8 @@ nfsd4_cb_notify_lock_prepare(struct nfsd4_callback *cb) static int nfsd4_cb_notify_lock_done(struct nfsd4_callback *cb, struct rpc_task *task) { + trace_nfsd_cb_notify_lock_done(&zero_stateid, task); + /* * Since this is just an optimization, we don't try very hard if it * turns out not to succeed. We'll requeue it on NFS4ERR_DELAY, and @@ -353,6 +397,130 @@ static const struct nfsd4_callback_ops nfsd4_cb_notify_lock_ops = { .release = nfsd4_cb_notify_lock_release, }; +/* + * We store the NONE, READ, WRITE, and BOTH bits separately in the + * st_{access,deny}_bmap field of the stateid, in order to track not + * only what share bits are currently in force, but also what + * combinations of share bits previous opens have used. This allows us + * to enforce the recommendation in + * https://datatracker.ietf.org/doc/html/rfc7530#section-16.19.4 that + * the server return an error if the client attempt to downgrade to a + * combination of share bits not explicable by closing some of its + * previous opens. + * + * This enforcement is arguably incomplete, since we don't keep + * track of access/deny bit combinations; so, e.g., we allow: + * + * OPEN allow read, deny write + * OPEN allow both, deny none + * DOWNGRADE allow read, deny none + * + * which we should reject. + * + * But you could also argue that our current code is already overkill, + * since it only exists to return NFS4ERR_INVAL on incorrect client + * behavior. + */ +static unsigned int +bmap_to_share_mode(unsigned long bmap) +{ + int i; + unsigned int access = 0; + + for (i = 1; i < 4; i++) { + if (test_bit(i, &bmap)) + access |= i; + } + return access; +} + +/* set share access for a given stateid */ +static inline void +set_access(u32 access, struct nfs4_ol_stateid *stp) +{ + unsigned char mask = 1 << access; + + WARN_ON_ONCE(access > NFS4_SHARE_ACCESS_BOTH); + stp->st_access_bmap |= mask; +} + +/* clear share access for a given stateid */ +static inline void +clear_access(u32 access, struct nfs4_ol_stateid *stp) +{ + unsigned char mask = 1 << access; + + WARN_ON_ONCE(access > NFS4_SHARE_ACCESS_BOTH); + stp->st_access_bmap &= ~mask; +} + +/* test whether a given stateid has access */ +static inline bool +test_access(u32 access, struct nfs4_ol_stateid *stp) +{ + unsigned char mask = 1 << access; + + return (bool)(stp->st_access_bmap & mask); +} + +/* set share deny for a given stateid */ +static inline void +set_deny(u32 deny, struct nfs4_ol_stateid *stp) +{ + unsigned char mask = 1 << deny; + + WARN_ON_ONCE(deny > NFS4_SHARE_DENY_BOTH); + stp->st_deny_bmap |= mask; +} + +/* clear share deny for a given stateid */ +static inline void +clear_deny(u32 deny, struct nfs4_ol_stateid *stp) +{ + unsigned char mask = 1 << deny; + + WARN_ON_ONCE(deny > NFS4_SHARE_DENY_BOTH); + stp->st_deny_bmap &= ~mask; +} + +/* test whether a given stateid is denying specific access */ +static inline bool +test_deny(u32 deny, struct nfs4_ol_stateid *stp) +{ + unsigned char mask = 1 << deny; + + return (bool)(stp->st_deny_bmap & mask); +} + +static int nfs4_access_to_omode(u32 access) +{ + switch (access & NFS4_SHARE_ACCESS_BOTH) { + case NFS4_SHARE_ACCESS_READ: + return O_RDONLY; + case NFS4_SHARE_ACCESS_WRITE: + return O_WRONLY; + case NFS4_SHARE_ACCESS_BOTH: + return O_RDWR; + } + WARN_ON_ONCE(1); + return O_RDONLY; +} + +static inline int +access_permit_read(struct nfs4_ol_stateid *stp) +{ + return test_access(NFS4_SHARE_ACCESS_READ, stp) || + test_access(NFS4_SHARE_ACCESS_BOTH, stp) || + test_access(NFS4_SHARE_ACCESS_WRITE, stp); +} + +static inline int +access_permit_write(struct nfs4_ol_stateid *stp) +{ + return test_access(NFS4_SHARE_ACCESS_WRITE, stp) || + test_access(NFS4_SHARE_ACCESS_BOTH, stp); +} + static inline struct nfs4_stateowner * nfs4_get_stateowner(struct nfs4_stateowner *sop) { @@ -494,6 +662,8 @@ find_any_file(struct nfs4_file *f) { struct nfsd_file *ret; + if (!f) + return NULL; spin_lock(&f->fi_lock); ret = __nfs4_get_fd(f, O_RDWR); if (!ret) { @@ -505,6 +675,17 @@ find_any_file(struct nfs4_file *f) return ret; } +static struct nfsd_file *find_deleg_file(struct nfs4_file *f) +{ + struct nfsd_file *ret = NULL; + + spin_lock(&f->fi_lock); + if (f->fi_deleg_file) + ret = nfsd_file_get(f->fi_deleg_file); + spin_unlock(&f->fi_lock); + return ret; +} + static atomic_long_t num_delegations; unsigned long max_delegations; @@ -529,18 +710,67 @@ static unsigned int ownerstr_hashval(struct xdr_netobj *ownername) #define FILE_HASH_BITS 8 #define FILE_HASH_SIZE (1 << FILE_HASH_BITS) -static unsigned int nfsd_fh_hashval(struct knfsd_fh *fh) +static unsigned int file_hashval(struct svc_fh *fh) { - return jhash2(fh->fh_base.fh_pad, XDR_QUADLEN(fh->fh_size), 0); -} + struct inode *inode = d_inode(fh->fh_dentry); -static unsigned int file_hashval(struct knfsd_fh *fh) -{ - return nfsd_fh_hashval(fh) & (FILE_HASH_SIZE - 1); + /* XXX: why not (here & in file cache) use inode? */ + return (unsigned int)hash_long(inode->i_ino, FILE_HASH_BITS); } static struct hlist_head file_hashtbl[FILE_HASH_SIZE]; +/* + * Check if courtesy clients have conflicting access and resolve it if possible + * + * access: is op_share_access if share_access is true. + * Check if access mode, op_share_access, would conflict with + * the current deny mode of the file 'fp'. + * access: is op_share_deny if share_access is false. + * Check if the deny mode, op_share_deny, would conflict with + * current access of the file 'fp'. + * stp: skip checking this entry. + * new_stp: normal open, not open upgrade. + * + * Function returns: + * false - access/deny mode conflict with normal client. + * true - no conflict or conflict with courtesy client(s) is resolved. + */ +static bool +nfs4_resolve_deny_conflicts_locked(struct nfs4_file *fp, bool new_stp, + struct nfs4_ol_stateid *stp, u32 access, bool share_access) +{ + struct nfs4_ol_stateid *st; + bool resolvable = true; + unsigned char bmap; + struct nfsd_net *nn; + struct nfs4_client *clp; + + lockdep_assert_held(&fp->fi_lock); + list_for_each_entry(st, &fp->fi_stateids, st_perfile) { + /* ignore lock stateid */ + if (st->st_openstp) + continue; + if (st == stp && new_stp) + continue; + /* check file access against deny mode or vice versa */ + bmap = share_access ? st->st_deny_bmap : st->st_access_bmap; + if (!(access & bmap_to_share_mode(bmap))) + continue; + clp = st->st_stid.sc_client; + if (try_to_expire_client(clp)) + continue; + resolvable = false; + break; + } + if (resolvable) { + clp = stp->st_stid.sc_client; + nn = net_generic(clp->net, nfsd_net_id); + mod_delayed_work(laundry_wq, &nn->laundromat_work, 0); + } + return resolvable; +} + static void __nfs4_file_get_access(struct nfs4_file *fp, u32 access) { @@ -601,9 +831,9 @@ static void __nfs4_file_put_access(struct nfs4_file *fp, int oflag) swap(f2, fp->fi_fds[O_RDWR]); spin_unlock(&fp->fi_lock); if (f1) - nfsd_file_put(f1); + nfsd_file_close(f1); if (f2) - nfsd_file_put(f2); + nfsd_file_close(f2); } } @@ -744,18 +974,19 @@ out_free: * Create a unique stateid_t to represent each COPY. */ static int nfs4_init_cp_state(struct nfsd_net *nn, copy_stateid_t *stid, - unsigned char sc_type) + unsigned char cs_type) { int new_id; - stid->stid.si_opaque.so_clid.cl_boot = (u32)nn->boot_time; - stid->stid.si_opaque.so_clid.cl_id = nn->s2s_cp_cl_id; - stid->sc_type = sc_type; + stid->cs_stid.si_opaque.so_clid.cl_boot = (u32)nn->boot_time; + stid->cs_stid.si_opaque.so_clid.cl_id = nn->s2s_cp_cl_id; + stid->cs_type = cs_type; idr_preload(GFP_KERNEL); spin_lock(&nn->s2s_cp_lock); new_id = idr_alloc_cyclic(&nn->s2s_cp_stateids, stid, 0, 0, GFP_NOWAIT); - stid->stid.si_opaque.so_id = new_id; + stid->cs_stid.si_opaque.so_id = new_id; + stid->cs_stid.si_generation = 1; spin_unlock(&nn->s2s_cp_lock); idr_preload_end(); if (new_id < 0) @@ -777,7 +1008,7 @@ struct nfs4_cpntf_state *nfs4_alloc_init_cpntf_state(struct nfsd_net *nn, if (!cps) return NULL; cps->cpntf_time = ktime_get_boottime_seconds(); - refcount_set(&cps->cp_stateid.sc_count, 1); + refcount_set(&cps->cp_stateid.cs_count, 1); if (!nfs4_init_cp_state(nn, &cps->cp_stateid, NFS4_COPYNOTIFY_STID)) goto out_free; spin_lock(&nn->s2s_cp_lock); @@ -793,11 +1024,11 @@ void nfs4_free_copy_state(struct nfsd4_copy *copy) { struct nfsd_net *nn; - WARN_ON_ONCE(copy->cp_stateid.sc_type != NFS4_COPY_STID); + WARN_ON_ONCE(copy->cp_stateid.cs_type != NFS4_COPY_STID); nn = net_generic(copy->cp_clp->net, nfsd_net_id); spin_lock(&nn->s2s_cp_lock); idr_remove(&nn->s2s_cp_stateids, - copy->cp_stateid.stid.si_opaque.so_id); + copy->cp_stateid.cs_stid.si_opaque.so_id); spin_unlock(&nn->s2s_cp_lock); } @@ -829,6 +1060,12 @@ static struct nfs4_ol_stateid * nfs4_alloc_open_stateid(struct nfs4_client *clp) static void nfs4_free_deleg(struct nfs4_stid *stid) { + struct nfs4_delegation *dp = delegstateid(stid); + + WARN_ON_ONCE(!list_empty(&stid->sc_cp_list)); + WARN_ON_ONCE(!list_empty(&dp->dl_perfile)); + WARN_ON_ONCE(!list_empty(&dp->dl_perclnt)); + WARN_ON_ONCE(!list_empty(&dp->dl_recall_lru)); kmem_cache_free(deleg_slab, stid); atomic_long_dec(&num_delegations); } @@ -878,7 +1115,7 @@ static int delegation_blocked(struct knfsd_fh *fh) } spin_unlock(&blocked_delegations_lock); } - hash = jhash(&fh->fh_base, fh->fh_size, 0); + hash = jhash(&fh->fh_raw, fh->fh_size, 0); if (test_bit(hash&255, bd->set[0]) && test_bit((hash>>8)&255, bd->set[0]) && test_bit((hash>>16)&255, bd->set[0])) @@ -897,7 +1134,7 @@ static void block_delegations(struct knfsd_fh *fh) u32 hash; struct bloom_pair *bd = &blocked_delegations; - hash = jhash(&fh->fh_base, fh->fh_size, 0); + hash = jhash(&fh->fh_raw, fh->fh_size, 0); spin_lock(&blocked_delegations_lock); __set_bit(hash&255, bd->set[bd->new]); @@ -911,7 +1148,6 @@ static void block_delegations(struct knfsd_fh *fh) static struct nfs4_delegation * alloc_init_deleg(struct nfs4_client *clp, struct nfs4_file *fp, - struct svc_fh *current_fh, struct nfs4_clnt_odstate *odstate) { struct nfs4_delegation *dp; @@ -921,7 +1157,7 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_file *fp, n = atomic_long_inc_return(&num_delegations); if (n < 0 || n > max_delegations) goto out_dec; - if (delegation_blocked(¤t_fh->fh_handle)) + if (delegation_blocked(&fp->fi_fhandle)) goto out_dec; dp = delegstateid(nfs4_alloc_stid(clp, deleg_slab, nfs4_free_deleg)); if (dp == NULL) @@ -940,6 +1176,7 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_file *fp, get_clnt_odstate(odstate); dp->dl_type = NFS4_OPEN_DELEGATE_READ; dp->dl_retries = 1; + dp->dl_recalled = false; nfsd4_init_cb(&dp->dl_recall, dp->dl_stid.sc_client, &nfsd4_cb_recall_ops, NFSPROC4_CLNT_CB_RECALL); get_nfs4_file(fp); @@ -1075,6 +1312,11 @@ hash_delegation_locked(struct nfs4_delegation *dp, struct nfs4_file *fp) return 0; } +static bool delegation_hashed(struct nfs4_delegation *dp) +{ + return !(list_empty(&dp->dl_perfile)); +} + static bool unhash_delegation_locked(struct nfs4_delegation *dp) { @@ -1082,7 +1324,7 @@ unhash_delegation_locked(struct nfs4_delegation *dp) lockdep_assert_held(&state_lock); - if (list_empty(&dp->dl_perfile)) + if (!delegation_hashed(dp)) return false; dp->dl_stid.sc_type = NFS4_CLOSED_DELEG_STID; @@ -1138,108 +1380,6 @@ static unsigned int clientstr_hashval(struct xdr_netobj name) } /* - * We store the NONE, READ, WRITE, and BOTH bits separately in the - * st_{access,deny}_bmap field of the stateid, in order to track not - * only what share bits are currently in force, but also what - * combinations of share bits previous opens have used. This allows us - * to enforce the recommendation of rfc 3530 14.2.19 that the server - * return an error if the client attempt to downgrade to a combination - * of share bits not explicable by closing some of its previous opens. - * - * XXX: This enforcement is actually incomplete, since we don't keep - * track of access/deny bit combinations; so, e.g., we allow: - * - * OPEN allow read, deny write - * OPEN allow both, deny none - * DOWNGRADE allow read, deny none - * - * which we should reject. - */ -static unsigned int -bmap_to_share_mode(unsigned long bmap) { - int i; - unsigned int access = 0; - - for (i = 1; i < 4; i++) { - if (test_bit(i, &bmap)) - access |= i; - } - return access; -} - -/* set share access for a given stateid */ -static inline void -set_access(u32 access, struct nfs4_ol_stateid *stp) -{ - unsigned char mask = 1 << access; - - WARN_ON_ONCE(access > NFS4_SHARE_ACCESS_BOTH); - stp->st_access_bmap |= mask; -} - -/* clear share access for a given stateid */ -static inline void -clear_access(u32 access, struct nfs4_ol_stateid *stp) -{ - unsigned char mask = 1 << access; - - WARN_ON_ONCE(access > NFS4_SHARE_ACCESS_BOTH); - stp->st_access_bmap &= ~mask; -} - -/* test whether a given stateid has access */ -static inline bool -test_access(u32 access, struct nfs4_ol_stateid *stp) -{ - unsigned char mask = 1 << access; - - return (bool)(stp->st_access_bmap & mask); -} - -/* set share deny for a given stateid */ -static inline void -set_deny(u32 deny, struct nfs4_ol_stateid *stp) -{ - unsigned char mask = 1 << deny; - - WARN_ON_ONCE(deny > NFS4_SHARE_DENY_BOTH); - stp->st_deny_bmap |= mask; -} - -/* clear share deny for a given stateid */ -static inline void -clear_deny(u32 deny, struct nfs4_ol_stateid *stp) -{ - unsigned char mask = 1 << deny; - - WARN_ON_ONCE(deny > NFS4_SHARE_DENY_BOTH); - stp->st_deny_bmap &= ~mask; -} - -/* test whether a given stateid is denying specific access */ -static inline bool -test_deny(u32 deny, struct nfs4_ol_stateid *stp) -{ - unsigned char mask = 1 << deny; - - return (bool)(stp->st_deny_bmap & mask); -} - -static int nfs4_access_to_omode(u32 access) -{ - switch (access & NFS4_SHARE_ACCESS_BOTH) { - case NFS4_SHARE_ACCESS_READ: - return O_RDONLY; - case NFS4_SHARE_ACCESS_WRITE: - return O_WRONLY; - case NFS4_SHARE_ACCESS_BOTH: - return O_RDWR; - } - WARN_ON_ONCE(1); - return O_RDONLY; -} - -/* * A stateid that had a deny mode associated with it is being released * or downgraded. Recalculate the deny mode on the file. */ @@ -1309,6 +1449,12 @@ static void nfs4_put_stateowner(struct nfs4_stateowner *sop) nfs4_free_stateowner(sop); } +static bool +nfs4_ol_stateid_unhashed(const struct nfs4_ol_stateid *stp) +{ + return list_empty(&stp->st_perfile); +} + static bool unhash_ol_stateid(struct nfs4_ol_stateid *stp) { struct nfs4_file *fp = stp->st_stid.sc_file; @@ -1333,6 +1479,7 @@ static void nfs4_free_ol_stateid(struct nfs4_stid *stid) release_all_access(stp); if (stp->st_stateowner) nfs4_put_stateowner(stp->st_stateowner); + WARN_ON(!list_empty(&stid->sc_cp_list)); kmem_cache_free(stateid_slab, stid); } @@ -1379,9 +1526,11 @@ static bool unhash_lock_stateid(struct nfs4_ol_stateid *stp) { lockdep_assert_held(&stp->st_stid.sc_client->cl_lock); + if (!unhash_ol_stateid(stp)) + return false; list_del_init(&stp->st_locks); nfs4_unhash_stid(&stp->st_stid); - return unhash_ol_stateid(stp); + return true; } static void release_lock_stateid(struct nfs4_ol_stateid *stp) @@ -1446,13 +1595,12 @@ static void release_open_stateid_locks(struct nfs4_ol_stateid *open_stp, static bool unhash_open_stateid(struct nfs4_ol_stateid *stp, struct list_head *reaplist) { - bool unhashed; - lockdep_assert_held(&stp->st_stid.sc_client->cl_lock); - unhashed = unhash_ol_stateid(stp); + if (!unhash_ol_stateid(stp)) + return false; release_open_stateid_locks(stp, reaplist); - return unhashed; + return true; } static void release_open_stateid(struct nfs4_ol_stateid *stp) @@ -1709,6 +1857,8 @@ static void nfsd4_conn_lost(struct svc_xpt_user *u) struct nfsd4_conn *c = container_of(u, struct nfsd4_conn, cn_xpt_user); struct nfs4_client *clp = c->cn_session->se_client; + trace_nfsd_cb_lost(clp); + spin_lock(&clp->cl_lock); if (!list_empty(&c->cn_persession)) { list_del(&c->cn_persession); @@ -1911,8 +2061,7 @@ STALE_CLIENTID(clientid_t *clid, struct nfsd_net *nn) */ if (clid->cl_boot == (u32)nn->boot_time) return 0; - dprintk("NFSD stale clientid (%08x/%08x) boot_time %08llx\n", - clid->cl_boot, clid->cl_id, nn->boot_time); + trace_nfsd_clid_stale(clid); return 1; } @@ -1921,11 +2070,16 @@ STALE_CLIENTID(clientid_t *clid, struct nfsd_net *nn) * This type of memory management is somewhat inefficient, but we use it * anyway since SETCLIENTID is not a common operation. */ -static struct nfs4_client *alloc_client(struct xdr_netobj name) +static struct nfs4_client *alloc_client(struct xdr_netobj name, + struct nfsd_net *nn) { struct nfs4_client *clp; int i; + if (atomic_read(&nn->nfs4_client_count) >= nn->nfs4_max_clients) { + mod_delayed_work(laundry_wq, &nn->laundromat_work, 0); + return NULL; + } clp = kmem_cache_zalloc(client_slab, GFP_KERNEL); if (clp == NULL) return NULL; @@ -1943,6 +2097,9 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name) idr_init(&clp->cl_stateids); atomic_set(&clp->cl_rpc_users, 0); clp->cl_cb_state = NFSD4_CB_UNKNOWN; + clp->cl_state = NFSD4_ACTIVE; + atomic_inc(&nn->nfs4_client_count); + atomic_set(&clp->cl_delegs_in_recall, 0); INIT_LIST_HEAD(&clp->cl_idhash); INIT_LIST_HEAD(&clp->cl_openowners); INIT_LIST_HEAD(&clp->cl_delegations); @@ -2049,6 +2206,7 @@ static __be32 mark_client_expired_locked(struct nfs4_client *clp) static void __destroy_client(struct nfs4_client *clp) { + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); int i; struct nfs4_openowner *oo; struct nfs4_delegation *dp; @@ -2092,6 +2250,8 @@ __destroy_client(struct nfs4_client *clp) nfsd4_shutdown_callback(clp); if (clp->cl_cb_conn.cb_xprt) svc_xprt_put(clp->cl_cb_conn.cb_xprt); + atomic_add_unless(&nn->nfs4_client_count, -1, 0); + nfsd4_dec_courtesy_client_count(nn, clp); free_client(clp); wake_up_all(&expiry_wq); } @@ -2316,13 +2476,28 @@ static struct nfs4_client *get_nfsdfs_clp(struct inode *inode) static void seq_quote_mem(struct seq_file *m, char *data, int len) { seq_printf(m, "\""); - seq_escape_mem_ascii(m, data, len); + seq_escape_mem(m, data, len, ESCAPE_HEX | ESCAPE_NAP | ESCAPE_APPEND, "\"\\"); seq_printf(m, "\""); } +static const char *cb_state2str(int state) +{ + switch (state) { + case NFSD4_CB_UP: + return "UP"; + case NFSD4_CB_UNKNOWN: + return "UNKNOWN"; + case NFSD4_CB_DOWN: + return "DOWN"; + case NFSD4_CB_FAULT: + return "FAULT"; + } + return "UNDEFINED"; +} + static int client_info_show(struct seq_file *m, void *v) { - struct inode *inode = m->private; + struct inode *inode = file_inode(m->file); struct nfs4_client *clp; u64 clid; @@ -2332,6 +2507,17 @@ static int client_info_show(struct seq_file *m, void *v) memcpy(&clid, &clp->cl_clientid, sizeof(clid)); seq_printf(m, "clientid: 0x%llx\n", clid); seq_printf(m, "address: \"%pISpc\"\n", (struct sockaddr *)&clp->cl_addr); + + if (clp->cl_state == NFSD4_COURTESY) + seq_puts(m, "status: courtesy\n"); + else if (clp->cl_state == NFSD4_EXPIRABLE) + seq_puts(m, "status: expirable\n"); + else if (test_bit(NFSD4_CLIENT_CONFIRMED, &clp->cl_flags)) + seq_puts(m, "status: confirmed\n"); + else + seq_puts(m, "status: unconfirmed\n"); + seq_printf(m, "seconds from last renew: %lld\n", + ktime_get_boottime_seconds() - clp->cl_time); seq_printf(m, "name: "); seq_quote_mem(m, clp->cl_name.data, clp->cl_name.len); seq_printf(m, "\nminor version: %d\n", clp->cl_minorversion); @@ -2344,22 +2530,14 @@ static int client_info_show(struct seq_file *m, void *v) seq_printf(m, "\nImplementation time: [%lld, %ld]\n", clp->cl_nii_time.tv_sec, clp->cl_nii_time.tv_nsec); } + seq_printf(m, "callback state: %s\n", cb_state2str(clp->cl_cb_state)); + seq_printf(m, "callback address: %pISpc\n", &clp->cl_cb_conn.cb_addr); drop_client(clp); return 0; } -static int client_info_open(struct inode *inode, struct file *file) -{ - return single_open(file, client_info_show, inode); -} - -static const struct file_operations client_info_fops = { - .open = client_info_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; +DEFINE_SHOW_ATTRIBUTE(client_info); static void *states_start(struct seq_file *s, loff_t *pos) __acquires(&clp->cl_lock) @@ -2395,9 +2573,14 @@ static void states_stop(struct seq_file *s, void *v) spin_unlock(&clp->cl_lock); } +static void nfs4_show_fname(struct seq_file *s, struct nfsd_file *f) +{ + seq_printf(s, "filename: \"%pD2\"", f->nf_file); +} + static void nfs4_show_superblock(struct seq_file *s, struct nfsd_file *f) { - struct inode *inode = f->nf_inode; + struct inode *inode = file_inode(f->nf_file); seq_printf(s, "superblock: \"%02x:%02x:%ld\"", MAJOR(inode->i_sb->s_dev), @@ -2411,6 +2594,12 @@ static void nfs4_show_owner(struct seq_file *s, struct nfs4_stateowner *oo) seq_quote_mem(s, oo->so_owner.data, oo->so_owner.len); } +static void nfs4_show_stateid(struct seq_file *s, stateid_t *stid) +{ + seq_printf(s, "0x%.8x", stid->si_generation); + seq_printf(s, "%12phN", &stid->si_opaque); +} + static int nfs4_show_open(struct seq_file *s, struct nfs4_stid *st) { struct nfs4_ol_stateid *ols; @@ -2425,8 +2614,12 @@ static int nfs4_show_open(struct seq_file *s, struct nfs4_stid *st) oo = ols->st_stateowner; nf = st->sc_file; file = find_any_file(nf); + if (!file) + return 0; - seq_printf(s, "- 0x%16phN: { type: open, ", &st->sc_stateid); + seq_printf(s, "- "); + nfs4_show_stateid(s, &st->sc_stateid); + seq_printf(s, ": { type: open, "); access = bmap_to_share_mode(ols->st_access_bmap); deny = bmap_to_share_mode(ols->st_deny_bmap); @@ -2440,6 +2633,8 @@ static int nfs4_show_open(struct seq_file *s, struct nfs4_stid *st) nfs4_show_superblock(s, file); seq_printf(s, ", "); + nfs4_show_fname(s, file); + seq_printf(s, ", "); nfs4_show_owner(s, oo); seq_printf(s, " }\n"); nfsd_file_put(file); @@ -2458,8 +2653,12 @@ static int nfs4_show_lock(struct seq_file *s, struct nfs4_stid *st) oo = ols->st_stateowner; nf = st->sc_file; file = find_any_file(nf); + if (!file) + return 0; - seq_printf(s, "- 0x%16phN: { type: lock, ", &st->sc_stateid); + seq_printf(s, "- "); + nfs4_show_stateid(s, &st->sc_stateid); + seq_printf(s, ": { type: lock, "); /* * Note: a lock stateid isn't really the same thing as a lock, @@ -2471,6 +2670,8 @@ static int nfs4_show_lock(struct seq_file *s, struct nfs4_stid *st) nfs4_show_superblock(s, file); /* XXX: open stateid? */ seq_printf(s, ", "); + nfs4_show_fname(s, file); + seq_printf(s, ", "); nfs4_show_owner(s, oo); seq_printf(s, " }\n"); nfsd_file_put(file); @@ -2486,9 +2687,13 @@ static int nfs4_show_deleg(struct seq_file *s, struct nfs4_stid *st) ds = delegstateid(st); nf = st->sc_file; - file = nf->fi_deleg_file; + file = find_deleg_file(nf); + if (!file) + return 0; - seq_printf(s, "- 0x%16phN: { type: deleg, ", &st->sc_stateid); + seq_printf(s, "- "); + nfs4_show_stateid(s, &st->sc_stateid); + seq_printf(s, ": { type: deleg, "); /* Kinda dead code as long as we only support read delegs: */ seq_printf(s, "access: %s, ", @@ -2497,7 +2702,10 @@ static int nfs4_show_deleg(struct seq_file *s, struct nfs4_stid *st) /* XXX: lease time, whether it's being recalled. */ nfs4_show_superblock(s, file); + seq_printf(s, ", "); + nfs4_show_fname(s, file); seq_printf(s, " }\n"); + nfsd_file_put(file); return 0; } @@ -2510,11 +2718,15 @@ static int nfs4_show_layout(struct seq_file *s, struct nfs4_stid *st) ls = container_of(st, struct nfs4_layout_stateid, ls_stid); file = ls->ls_file; - seq_printf(s, "- 0x%16phN: { type: layout, ", &st->sc_stateid); + seq_printf(s, "- "); + nfs4_show_stateid(s, &st->sc_stateid); + seq_printf(s, ": { type: layout, "); /* XXX: What else would be useful? */ nfs4_show_superblock(s, file); + seq_printf(s, ", "); + nfs4_show_fname(s, file); seq_printf(s, " }\n"); return 0; @@ -2592,9 +2804,11 @@ static void force_expire_client(struct nfs4_client *clp) struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); bool already_expired; - spin_lock(&clp->cl_lock); + trace_nfsd_clid_admin_expired(&clp->cl_clientid); + + spin_lock(&nn->client_lock); clp->cl_time = 0; - spin_unlock(&clp->cl_lock); + spin_unlock(&nn->client_lock); wait_event(expiry_wq, atomic_read(&clp->cl_rpc_users) == 0); spin_lock(&nn->client_lock); @@ -2636,7 +2850,7 @@ static const struct file_operations client_ctl_fops = { static const struct tree_descr client_files[] = { [0] = {"info", &client_info_fops, S_IRUSR}, [1] = {"states", &client_states_fops, S_IRUSR}, - [2] = {"ctl", &client_ctl_fops, S_IRUSR|S_IWUSR}, + [2] = {"ctl", &client_ctl_fops, S_IWUSR}, [3] = {""}, }; @@ -2648,8 +2862,9 @@ static struct nfs4_client *create_client(struct xdr_netobj name, int ret; struct net *net = SVC_NET(rqstp); struct nfsd_net *nn = net_generic(net, nfsd_net_id); + struct dentry *dentries[ARRAY_SIZE(client_files)]; - clp = alloc_client(name); + clp = alloc_client(name, nn); if (clp == NULL) return NULL; @@ -2667,9 +2882,11 @@ static struct nfs4_client *create_client(struct xdr_netobj name, memcpy(&clp->cl_addr, sa, sizeof(struct sockaddr_storage)); clp->cl_cb_session = NULL; clp->net = net; - clp->cl_nfsd_dentry = nfsd_client_mkdir(nn, &clp->cl_nfsdfs, - clp->cl_clientid.cl_id - nn->clientid_base, - client_files); + clp->cl_nfsd_dentry = nfsd_client_mkdir( + nn, &clp->cl_nfsdfs, + clp->cl_clientid.cl_id - nn->clientid_base, + client_files, dentries); + clp->cl_nfsd_info_dentry = dentries[0]; if (!clp->cl_nfsd_dentry) { free_client(clp); return NULL; @@ -2740,11 +2957,11 @@ move_to_confirmed(struct nfs4_client *clp) lockdep_assert_held(&nn->client_lock); - dprintk("NFSD: move_to_confirm nfs4_client %p\n", clp); list_move(&clp->cl_idhash, &nn->conf_id_hashtbl[idhashval]); rb_erase(&clp->cl_namenode, &nn->unconf_name_tree); add_clp_to_name_tree(clp, &nn->conf_name_tree); set_bit(NFSD4_CLIENT_CONFIRMED, &clp->cl_flags); + trace_nfsd_clid_confirmed(&clp->cl_clientid); renew_client_locked(clp); } @@ -2834,14 +3051,12 @@ gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se, struct svc_r conn->cb_prog = se->se_callback_prog; conn->cb_ident = se->se_callback_ident; memcpy(&conn->cb_saddr, &rqstp->rq_daddr, rqstp->rq_daddrlen); + trace_nfsd_cb_args(clp, conn); return; out_err: conn->cb_addr.ss_family = AF_UNSPEC; conn->cb_addrlen = 0; - dprintk("NFSD: this client (clientid %08x/%08x) " - "will not receive delegations\n", - clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id); - + trace_nfsd_cb_nodelegs(clp); return; } @@ -2851,7 +3066,7 @@ out_err: static void nfsd4_store_cache_entry(struct nfsd4_compoundres *resp) { - struct xdr_buf *buf = resp->xdr.buf; + struct xdr_buf *buf = resp->xdr->buf; struct nfsd4_slot *slot = resp->cstate.slot; unsigned int base; @@ -2921,7 +3136,7 @@ nfsd4_replay_cache_entry(struct nfsd4_compoundres *resp, struct nfsd4_sequence *seq) { struct nfsd4_slot *slot = resp->cstate.slot; - struct xdr_stream *xdr = &resp->xdr; + struct xdr_stream *xdr = resp->xdr; __be32 *p; __be32 status; @@ -3015,7 +3230,7 @@ nfsd4_exchange_id(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, rpc_ntop(sa, addr_str, sizeof(addr_str)); dprintk("%s rqstp=%p exid=%p clname.len=%u clname.data=%p " - "ip_addr=%s flags %x, spa_how %d\n", + "ip_addr=%s flags %x, spa_how %u\n", __func__, rqstp, exid, exid->clname.len, exid->clname.data, addr_str, exid->flags, exid->spa_how); @@ -3062,11 +3277,12 @@ nfsd4_exchange_id(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, goto out_nolock; } new->cl_mach_cred = true; + break; case SP4_NONE: break; default: /* checked by xdr code */ WARN_ON_ONCE(1); - /* fall through */ + fallthrough; case SP4_SSV: status = nfserr_encr_alg_unsupp; goto out_nolock; @@ -3098,20 +3314,24 @@ nfsd4_exchange_id(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, } /* case 6 */ exid->flags |= EXCHGID4_FLAG_CONFIRMED_R; + trace_nfsd_clid_confirmed_r(conf); goto out_copy; } if (!creds_match) { /* case 3 */ if (client_has_state(conf)) { status = nfserr_clid_inuse; + trace_nfsd_clid_cred_mismatch(conf, rqstp); goto out; } goto out_new; } if (verfs_match) { /* case 2 */ conf->cl_exchange_flags |= EXCHGID4_FLAG_CONFIRMED_R; + trace_nfsd_clid_confirmed_r(conf); goto out_copy; } /* case 5, client reboot */ + trace_nfsd_clid_verf_mismatch(conf, rqstp, &verf); conf = NULL; goto out_new; } @@ -3121,16 +3341,19 @@ nfsd4_exchange_id(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, goto out; } - unconf = find_unconfirmed_client_by_name(&exid->clname, nn); + unconf = find_unconfirmed_client_by_name(&exid->clname, nn); if (unconf) /* case 4, possible retry or client restart */ unhash_client_locked(unconf); - /* case 1 (normal case) */ + /* case 1, new owner ID */ + trace_nfsd_clid_fresh(new); + out_new: if (conf) { status = mark_client_expired_locked(conf); if (status) goto out; + trace_nfsd_clid_replaced(&conf->cl_clientid); } new->cl_minorversion = cstate->minorversion; new->cl_spo_must_allow.u.words[0] = exid->spo_must_allow[0]; @@ -3154,8 +3377,10 @@ out: out_nolock: if (new) expire_client(new); - if (unconf) + if (unconf) { + trace_nfsd_clid_expire_unconf(&unconf->cl_clientid); expire_client(unconf); + } return status; } @@ -3347,9 +3572,10 @@ nfsd4_create_session(struct svc_rqst *rqstp, goto out_free_conn; } } else if (unconf) { + status = nfserr_clid_inuse; if (!same_creds(&unconf->cl_cred, &rqstp->rq_cred) || !rpc_cmp_addr(sa, (struct sockaddr *) &unconf->cl_addr)) { - status = nfserr_clid_inuse; + trace_nfsd_clid_cred_mismatch(unconf, rqstp); goto out_free_conn; } status = nfserr_wrong_cred; @@ -3369,6 +3595,7 @@ nfsd4_create_session(struct svc_rqst *rqstp, old = NULL; goto out_free_conn; } + trace_nfsd_clid_replaced(&old->cl_clientid); } move_to_confirmed(unconf); conf = unconf; @@ -3393,6 +3620,8 @@ nfsd4_create_session(struct svc_rqst *rqstp, /* cache solo and embedded create sessions under the client_lock */ nfsd4_cache_create_session(cr_ses, cs_slot, status); spin_unlock(&nn->client_lock); + if (conf == unconf) + fsnotify_dentry(conf->cl_nfsd_info_dentry, FS_MODIFY); /* init connection and backchannel */ nfsd4_init_conn(rqstp, conn, new); nfsd4_put_session(new); @@ -3447,6 +3676,47 @@ __be32 nfsd4_backchannel_ctl(struct svc_rqst *rqstp, return nfs_ok; } +static struct nfsd4_conn *__nfsd4_find_conn(struct svc_xprt *xpt, struct nfsd4_session *s) +{ + struct nfsd4_conn *c; + + list_for_each_entry(c, &s->se_conns, cn_persession) { + if (c->cn_xprt == xpt) { + return c; + } + } + return NULL; +} + +static __be32 nfsd4_match_existing_connection(struct svc_rqst *rqst, + struct nfsd4_session *session, u32 req, struct nfsd4_conn **conn) +{ + struct nfs4_client *clp = session->se_client; + struct svc_xprt *xpt = rqst->rq_xprt; + struct nfsd4_conn *c; + __be32 status; + + /* Following the last paragraph of RFC 5661 Section 18.34.3: */ + spin_lock(&clp->cl_lock); + c = __nfsd4_find_conn(xpt, session); + if (!c) + status = nfserr_noent; + else if (req == c->cn_flags) + status = nfs_ok; + else if (req == NFS4_CDFC4_FORE_OR_BOTH && + c->cn_flags != NFS4_CDFC4_BACK) + status = nfs_ok; + else if (req == NFS4_CDFC4_BACK_OR_BOTH && + c->cn_flags != NFS4_CDFC4_FORE) + status = nfs_ok; + else + status = nfserr_inval; + spin_unlock(&clp->cl_lock); + if (status == nfs_ok && conn) + *conn = c; + return status; +} + __be32 nfsd4_bind_conn_to_session(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) @@ -3468,6 +3738,17 @@ __be32 nfsd4_bind_conn_to_session(struct svc_rqst *rqstp, status = nfserr_wrong_cred; if (!nfsd4_mach_creds_match(session->se_client, rqstp)) goto out; + status = nfsd4_match_existing_connection(rqstp, session, + bcts->dir, &conn); + if (status == nfs_ok) { + if (bcts->dir == NFS4_CDFC4_FORE_OR_BOTH || + bcts->dir == NFS4_CDFC4_BACK) + conn->cn_flags |= NFS4_CDFC4_BACK; + nfsd4_probe_callback(session->se_client); + goto out; + } + if (status == nfserr_inval) + goto out; status = nfsd4_map_bcts_dir(&bcts->dir); if (status) goto out; @@ -3533,18 +3814,6 @@ out: return status; } -static struct nfsd4_conn *__nfsd4_find_conn(struct svc_xprt *xpt, struct nfsd4_session *s) -{ - struct nfsd4_conn *c; - - list_for_each_entry(c, &s->se_conns, cn_persession) { - if (c->cn_xprt == xpt) { - return c; - } - } - return NULL; -} - static __be32 nfsd4_sequence_check_conn(struct nfsd4_conn *new, struct nfsd4_session *ses) { struct nfs4_client *clp = ses->se_client; @@ -3626,7 +3895,7 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, { struct nfsd4_sequence *seq = &u->sequence; struct nfsd4_compoundres *resp = rqstp->rq_resp; - struct xdr_stream *xdr = &resp->xdr; + struct xdr_stream *xdr = resp->xdr; struct nfsd4_session *session; struct nfs4_client *clp; struct nfsd4_slot *slot; @@ -3796,6 +4065,7 @@ nfsd4_destroy_clientid(struct svc_rqst *rqstp, status = nfserr_wrong_cred; goto out; } + trace_nfsd_clid_destroyed(&clp->cl_clientid); unhash_client_locked(clp); out: spin_unlock(&nn->client_lock); @@ -3809,6 +4079,7 @@ nfsd4_reclaim_complete(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) { struct nfsd4_reclaim_complete *rc = &u->reclaim_complete; + struct nfs4_client *clp = cstate->clp; __be32 status = 0; if (rc->rca_one_fs) { @@ -3822,12 +4093,11 @@ nfsd4_reclaim_complete(struct svc_rqst *rqstp, } status = nfserr_complete_already; - if (test_and_set_bit(NFSD4_CLIENT_RECLAIM_COMPLETE, - &cstate->session->se_client->cl_flags)) + if (test_and_set_bit(NFSD4_CLIENT_RECLAIM_COMPLETE, &clp->cl_flags)) goto out; status = nfserr_stale_clientid; - if (is_client_expired(cstate->session->se_client)) + if (is_client_expired(clp)) /* * The following error isn't really legal. * But we only get here if the client just explicitly @@ -3838,8 +4108,9 @@ nfsd4_reclaim_complete(struct svc_rqst *rqstp, goto out; status = nfs_ok; - nfsd4_client_record_create(cstate->session->se_client); - inc_reclaim_complete(cstate->session->se_client); + trace_nfsd_clid_reclaim_complete(&clp->cl_clientid); + nfsd4_client_record_create(clp); + inc_reclaim_complete(clp); out: return status; } @@ -3859,32 +4130,29 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, new = create_client(clname, rqstp, &clverifier); if (new == NULL) return nfserr_jukebox; - /* Cases below refer to rfc 3530 section 14.2.33: */ spin_lock(&nn->client_lock); conf = find_confirmed_client_by_name(&clname, nn); if (conf && client_has_state(conf)) { - /* case 0: */ status = nfserr_clid_inuse; if (clp_used_exchangeid(conf)) goto out; if (!same_creds(&conf->cl_cred, &rqstp->rq_cred)) { - char addr_str[INET6_ADDRSTRLEN]; - rpc_ntop((struct sockaddr *) &conf->cl_addr, addr_str, - sizeof(addr_str)); - dprintk("NFSD: setclientid: string in use by client " - "at %s\n", addr_str); + trace_nfsd_clid_cred_mismatch(conf, rqstp); goto out; } } unconf = find_unconfirmed_client_by_name(&clname, nn); if (unconf) unhash_client_locked(unconf); - if (conf && same_verf(&conf->cl_verifier, &clverifier)) { - /* case 1: probable callback update */ - copy_clid(new, conf); - gen_confirm(new, nn); - } else /* case 4 (new client) or cases 2, 3 (client reboot): */ - ; + if (conf) { + if (same_verf(&conf->cl_verifier, &clverifier)) { + copy_clid(new, conf); + gen_confirm(new, nn); + } else + trace_nfsd_clid_verf_mismatch(conf, rqstp, + &clverifier); + } else + trace_nfsd_clid_fresh(new); new->cl_minorversion = 0; gen_callback(new, setclid, rqstp); add_to_unconfirmed(new); @@ -3897,12 +4165,13 @@ out: spin_unlock(&nn->client_lock); if (new) free_client(new); - if (unconf) + if (unconf) { + trace_nfsd_clid_expire_unconf(&unconf->cl_clientid); expire_client(unconf); + } return status; } - __be32 nfsd4_setclientid_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, @@ -3931,43 +4200,50 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, * Nevertheless, RFC 7530 recommends INUSE for this case: */ status = nfserr_clid_inuse; - if (unconf && !same_creds(&unconf->cl_cred, &rqstp->rq_cred)) + if (unconf && !same_creds(&unconf->cl_cred, &rqstp->rq_cred)) { + trace_nfsd_clid_cred_mismatch(unconf, rqstp); goto out; - if (conf && !same_creds(&conf->cl_cred, &rqstp->rq_cred)) + } + if (conf && !same_creds(&conf->cl_cred, &rqstp->rq_cred)) { + trace_nfsd_clid_cred_mismatch(conf, rqstp); goto out; - /* cases below refer to rfc 3530 section 14.2.34: */ + } if (!unconf || !same_verf(&confirm, &unconf->cl_confirm)) { if (conf && same_verf(&confirm, &conf->cl_confirm)) { - /* case 2: probable retransmit */ status = nfs_ok; - } else /* case 4: client hasn't noticed we rebooted yet? */ + } else status = nfserr_stale_clientid; goto out; } status = nfs_ok; - if (conf) { /* case 1: callback update */ + if (conf) { old = unconf; unhash_client_locked(old); nfsd4_change_callback(conf, &unconf->cl_cb_conn); - } else { /* case 3: normal case; new or rebooted client */ + } else { old = find_confirmed_client_by_name(&unconf->cl_name, nn); if (old) { status = nfserr_clid_inuse; if (client_has_state(old) && !same_creds(&unconf->cl_cred, - &old->cl_cred)) + &old->cl_cred)) { + old = NULL; goto out; + } status = mark_client_expired_locked(old); if (status) { old = NULL; goto out; } + trace_nfsd_clid_replaced(&old->cl_clientid); } move_to_confirmed(unconf); conf = unconf; } get_client_locked(conf); spin_unlock(&nn->client_lock); + if (conf == unconf) + fsnotify_dentry(conf->cl_nfsd_info_dentry, FS_MODIFY); nfsd4_probe_callback(conf); spin_lock(&nn->client_lock); put_client_renew_locked(conf); @@ -3984,7 +4260,7 @@ static struct nfs4_file *nfsd4_alloc_file(void) } /* OPEN Share state helper functions */ -static void nfsd4_init_file(struct knfsd_fh *fh, unsigned int hashval, +static void nfsd4_init_file(struct svc_fh *fh, unsigned int hashval, struct nfs4_file *fp) { lockdep_assert_held(&state_lock); @@ -3994,12 +4270,14 @@ static void nfsd4_init_file(struct knfsd_fh *fh, unsigned int hashval, INIT_LIST_HEAD(&fp->fi_stateids); INIT_LIST_HEAD(&fp->fi_delegations); INIT_LIST_HEAD(&fp->fi_clnt_odstate); - fh_copy_shallow(&fp->fi_fhandle, fh); + fh_copy_shallow(&fp->fi_fhandle, &fh->fh_handle); fp->fi_deleg_file = NULL; fp->fi_had_conflict = false; fp->fi_share_deny = 0; memset(fp->fi_fds, 0, sizeof(fp->fi_fds)); memset(fp->fi_access, 0, sizeof(fp->fi_access)); + fp->fi_aliased = false; + fp->fi_inode = d_inode(fh->fh_dentry); #ifdef CONFIG_NFSD_PNFS INIT_LIST_HEAD(&fp->fi_lo_states); atomic_set(&fp->fi_lo_recalls, 0); @@ -4065,10 +4343,62 @@ out_free_openowner_slab: out_free_client_slab: kmem_cache_destroy(client_slab); out: - dprintk("nfsd4: out of memory while initializing nfsv4\n"); return -ENOMEM; } +static unsigned long +nfsd_courtesy_client_count(struct shrinker *shrink, struct shrink_control *sc) +{ + int cnt; + struct nfsd_net *nn = container_of(shrink, + struct nfsd_net, nfsd_client_shrinker); + + cnt = atomic_read(&nn->nfsd_courtesy_clients); + if (cnt > 0) + mod_delayed_work(laundry_wq, &nn->nfsd_shrinker_work, 0); + return (unsigned long)cnt; +} + +static unsigned long +nfsd_courtesy_client_scan(struct shrinker *shrink, struct shrink_control *sc) +{ + return SHRINK_STOP; +} + +int +nfsd4_init_leases_net(struct nfsd_net *nn) +{ + struct sysinfo si; + u64 max_clients; + + nn->nfsd4_lease = 90; /* default lease time */ + nn->nfsd4_grace = 90; + nn->somebody_reclaimed = false; + nn->track_reclaim_completes = false; + nn->clverifier_counter = get_random_u32(); + nn->clientid_base = get_random_u32(); + nn->clientid_counter = nn->clientid_base + 1; + nn->s2s_cp_cl_id = nn->clientid_counter++; + + atomic_set(&nn->nfs4_client_count, 0); + si_meminfo(&si); + max_clients = (u64)si.totalram * si.mem_unit / (1024 * 1024 * 1024); + max_clients *= NFS4_CLIENTS_PER_GB; + nn->nfs4_max_clients = max_t(int, max_clients, NFS4_CLIENTS_PER_GB); + + atomic_set(&nn->nfsd_courtesy_clients, 0); + nn->nfsd_client_shrinker.scan_objects = nfsd_courtesy_client_scan; + nn->nfsd_client_shrinker.count_objects = nfsd_courtesy_client_count; + nn->nfsd_client_shrinker.seeks = DEFAULT_SEEKS; + return register_shrinker(&nn->nfsd_client_shrinker, "nfsd-client"); +} + +void +nfsd4_leases_net_shutdown(struct nfsd_net *nn) +{ + unregister_shrinker(&nn->nfsd_client_shrinker); +} + static void init_nfs4_replay(struct nfs4_replay *rp) { rp->rp_status = nfserr_serverfault; @@ -4339,12 +4669,13 @@ move_to_close_lru(struct nfs4_ol_stateid *s, struct net *net) /* search file_hashtbl[] for file */ static struct nfs4_file * -find_file_locked(struct knfsd_fh *fh, unsigned int hashval) +find_file_locked(struct svc_fh *fh, unsigned int hashval) { struct nfs4_file *fp; - hlist_for_each_entry_rcu(fp, &file_hashtbl[hashval], fi_hash) { - if (fh_match(&fp->fi_fhandle, fh)) { + hlist_for_each_entry_rcu(fp, &file_hashtbl[hashval], fi_hash, + lockdep_is_held(&state_lock)) { + if (fh_match(&fp->fi_fhandle, &fh->fh_handle)) { if (refcount_inc_not_zero(&fp->fi_ref)) return fp; } @@ -4352,8 +4683,32 @@ find_file_locked(struct knfsd_fh *fh, unsigned int hashval) return NULL; } -struct nfs4_file * -find_file(struct knfsd_fh *fh) +static struct nfs4_file *insert_file(struct nfs4_file *new, struct svc_fh *fh, + unsigned int hashval) +{ + struct nfs4_file *fp; + struct nfs4_file *ret = NULL; + bool alias_found = false; + + spin_lock(&state_lock); + hlist_for_each_entry_rcu(fp, &file_hashtbl[hashval], fi_hash, + lockdep_is_held(&state_lock)) { + if (fh_match(&fp->fi_fhandle, &fh->fh_handle)) { + if (refcount_inc_not_zero(&fp->fi_ref)) + ret = fp; + } else if (d_inode(fh->fh_dentry) == fp->fi_inode) + fp->fi_aliased = alias_found = true; + } + if (likely(ret == NULL)) { + nfsd4_init_file(fh, hashval, new); + new->fi_aliased = alias_found; + ret = new; + } + spin_unlock(&state_lock); + return ret; +} + +static struct nfs4_file * find_file(struct svc_fh *fh) { struct nfs4_file *fp; unsigned int hashval = file_hashval(fh); @@ -4365,7 +4720,7 @@ find_file(struct knfsd_fh *fh) } static struct nfs4_file * -find_or_add_file(struct nfs4_file *new, struct knfsd_fh *fh) +find_or_add_file(struct nfs4_file *new, struct svc_fh *fh) { struct nfs4_file *fp; unsigned int hashval = file_hashval(fh); @@ -4376,15 +4731,7 @@ find_or_add_file(struct nfs4_file *new, struct knfsd_fh *fh) if (fp) return fp; - spin_lock(&state_lock); - fp = find_file_locked(fh, hashval); - if (likely(fp == NULL)) { - nfsd4_init_file(fh, hashval, new); - fp = new; - } - spin_unlock(&state_lock); - - return fp; + return insert_file(new, fh, hashval); } /* @@ -4397,7 +4744,7 @@ nfs4_share_conflict(struct svc_fh *current_fh, unsigned int deny_type) struct nfs4_file *fp; __be32 ret = nfs_ok; - fp = find_file(¤t_fh->fh_handle); + fp = find_file(current_fh); if (!fp) return ret; /* Check for conflicting share reservations */ @@ -4409,6 +4756,35 @@ nfs4_share_conflict(struct svc_fh *current_fh, unsigned int deny_type) return ret; } +static bool nfsd4_deleg_present(const struct inode *inode) +{ + struct file_lock_context *ctx = smp_load_acquire(&inode->i_flctx); + + return ctx && !list_empty_careful(&ctx->flc_lease); +} + +/** + * nfsd_wait_for_delegreturn - wait for delegations to be returned + * @rqstp: the RPC transaction being executed + * @inode: in-core inode of the file being waited for + * + * The timeout prevents deadlock if all nfsd threads happen to be + * tied up waiting for returning delegations. + * + * Return values: + * %true: delegation was returned + * %false: timed out waiting for delegreturn + */ +bool nfsd_wait_for_delegreturn(struct svc_rqst *rqstp, struct inode *inode) +{ + long __maybe_unused timeo; + + timeo = wait_var_event_timeout(inode, !nfsd4_deleg_present(inode), + NFSD_DELEGRETURN_TIMEOUT); + trace_nfsd_delegret_wakeup(rqstp, inode, timeo); + return timeo > 0; +} + static void nfsd4_cb_recall_prepare(struct nfsd4_callback *cb) { struct nfs4_delegation *dp = cb_to_delegation(cb); @@ -4425,7 +4801,7 @@ static void nfsd4_cb_recall_prepare(struct nfsd4_callback *cb) * queued for a lease break. Don't queue it again. */ spin_lock(&state_lock); - if (dp->dl_time == 0) { + if (delegation_hashed(dp) && dp->dl_time == 0) { dp->dl_time = ktime_get_boottime_seconds(); list_add_tail(&dp->dl_recall_lru, &nn->del_recall_lru); } @@ -4437,7 +4813,10 @@ static int nfsd4_cb_recall_done(struct nfsd4_callback *cb, { struct nfs4_delegation *dp = cb_to_delegation(cb); - if (dp->dl_stid.sc_type == NFS4_CLOSED_DELEG_STID) + trace_nfsd_cb_recall_done(&dp->dl_stid.sc_stateid, task); + + if (dp->dl_stid.sc_type == NFS4_CLOSED_DELEG_STID || + dp->dl_stid.sc_type == NFS4_REVOKED_DELEG_STID) return 1; switch (task->tk_status) { @@ -4456,7 +4835,7 @@ static int nfsd4_cb_recall_done(struct nfsd4_callback *cb, rpc_delay(task, 2 * HZ); return 0; } - /*FALLTHRU*/ + fallthrough; default: return 1; } @@ -4481,20 +4860,30 @@ static void nfsd_break_one_deleg(struct nfs4_delegation *dp) * We're assuming the state code never drops its reference * without first removing the lease. Since we're in this lease * callback (and since the lease code is serialized by the - * i_lock) we know the server hasn't removed the lease yet, and + * flc_lock) we know the server hasn't removed the lease yet, and * we know it's safe to take a reference. */ refcount_inc(&dp->dl_stid.sc_count); - nfsd4_run_cb(&dp->dl_recall); + WARN_ON_ONCE(!nfsd4_run_cb(&dp->dl_recall)); } -/* Called from break_lease() with i_lock held. */ +/* Called from break_lease() with flc_lock held. */ static bool nfsd_break_deleg_cb(struct file_lock *fl) { - bool ret = false; struct nfs4_delegation *dp = (struct nfs4_delegation *)fl->fl_owner; struct nfs4_file *fp = dp->dl_stid.sc_file; + struct nfs4_client *clp = dp->dl_stid.sc_client; + struct nfsd_net *nn; + + trace_nfsd_cb_recall(&dp->dl_stid); + + dp->dl_recalled = true; + atomic_inc(&clp->cl_delegs_in_recall); + if (try_to_expire_client(clp)) { + nn = net_generic(clp->net, nfsd_net_id); + mod_delayed_work(laundry_wq, &nn->laundromat_work, 0); + } /* * We don't want the locks code to timeout the lease for us; @@ -4507,20 +4896,50 @@ nfsd_break_deleg_cb(struct file_lock *fl) fp->fi_had_conflict = true; nfsd_break_one_deleg(dp); spin_unlock(&fp->fi_lock); - return ret; + return false; +} + +/** + * nfsd_breaker_owns_lease - Check if lease conflict was resolved + * @fl: Lock state to check + * + * Return values: + * %true: Lease conflict was resolved + * %false: Lease conflict was not resolved. + */ +static bool nfsd_breaker_owns_lease(struct file_lock *fl) +{ + struct nfs4_delegation *dl = fl->fl_owner; + struct svc_rqst *rqst; + struct nfs4_client *clp; + + if (!i_am_nfsd()) + return false; + rqst = kthread_data(current); + /* Note rq_prog == NFS_ACL_PROGRAM is also possible: */ + if (rqst->rq_prog != NFS_PROGRAM || rqst->rq_vers < 4) + return false; + clp = *(rqst->rq_lease_breaker); + return dl->dl_stid.sc_client == clp; } static int nfsd_change_deleg_cb(struct file_lock *onlist, int arg, struct list_head *dispose) { - if (arg & F_UNLCK) + struct nfs4_delegation *dp = (struct nfs4_delegation *)onlist->fl_owner; + struct nfs4_client *clp = dp->dl_stid.sc_client; + + if (arg & F_UNLCK) { + if (dp->dl_recalled) + atomic_dec(&clp->cl_delegs_in_recall); return lease_modify(onlist, arg, dispose); - else + } else return -EAGAIN; } static const struct lock_manager_operations nfsd_lease_mng_ops = { + .lm_breaker_owns_lease = nfsd_breaker_owns_lease, .lm_break = nfsd_break_deleg_cb, .lm_change = nfsd_change_deleg_cb, }; @@ -4536,40 +4955,37 @@ static __be32 nfsd4_check_seqid(struct nfsd4_compound_state *cstate, struct nfs4 return nfserr_bad_seqid; } -static __be32 lookup_clientid(clientid_t *clid, - struct nfsd4_compound_state *cstate, - struct nfsd_net *nn, - bool sessions) +static struct nfs4_client *lookup_clientid(clientid_t *clid, bool sessions, + struct nfsd_net *nn) { struct nfs4_client *found; + spin_lock(&nn->client_lock); + found = find_confirmed_client(clid, sessions, nn); + if (found) + atomic_inc(&found->cl_rpc_users); + spin_unlock(&nn->client_lock); + return found; +} + +static __be32 set_client(clientid_t *clid, + struct nfsd4_compound_state *cstate, + struct nfsd_net *nn) +{ if (cstate->clp) { - found = cstate->clp; - if (!same_clid(&found->cl_clientid, clid)) + if (!same_clid(&cstate->clp->cl_clientid, clid)) return nfserr_stale_clientid; return nfs_ok; } - if (STALE_CLIENTID(clid, nn)) return nfserr_stale_clientid; - /* - * For v4.1+ we get the client in the SEQUENCE op. If we don't have one - * cached already then we know this is for is for v4.0 and "sessions" - * will be false. + * We're in the 4.0 case (otherwise the SEQUENCE op would have + * set cstate->clp), so session = false: */ - WARN_ON_ONCE(cstate->session); - spin_lock(&nn->client_lock); - found = find_confirmed_client(clid, sessions, nn); - if (!found) { - spin_unlock(&nn->client_lock); + cstate->clp = lookup_clientid(clid, false, nn); + if (!cstate->clp) return nfserr_expired; - } - atomic_inc(&found->cl_rpc_users); - spin_unlock(&nn->client_lock); - - /* Cache the nfs4_client in cstate! */ - cstate->clp = found; return nfs_ok; } @@ -4583,8 +4999,6 @@ nfsd4_process_open1(struct nfsd4_compound_state *cstate, struct nfs4_openowner *oo = NULL; __be32 status; - if (STALE_CLIENTID(&open->op_clientid, nn)) - return nfserr_stale_clientid; /* * In case we need it later, after we've already created the * file and don't want to risk a further failure: @@ -4593,7 +5007,7 @@ nfsd4_process_open1(struct nfsd4_compound_state *cstate, if (open->op_file == NULL) return nfserr_jukebox; - status = lookup_clientid(clientid, cstate, nn, false); + status = set_client(clientid, cstate, nn); if (status) return status; clp = cstate->clp; @@ -4717,16 +5131,19 @@ nfsd4_truncate(struct svc_rqst *rqstp, struct svc_fh *fh, .ia_valid = ATTR_SIZE, .ia_size = 0, }; + struct nfsd_attrs attrs = { + .na_iattr = &iattr, + }; if (!open->op_truncate) return 0; if (!(open->op_share_access & NFS4_SHARE_ACCESS_WRITE)) return nfserr_inval; - return nfsd_setattr(rqstp, fh, &iattr, 0, (time64_t)0); + return nfsd_setattr(rqstp, fh, &attrs, 0, (time64_t)0); } static __be32 nfs4_get_vfs_file(struct svc_rqst *rqstp, struct nfs4_file *fp, struct svc_fh *cur_fh, struct nfs4_ol_stateid *stp, - struct nfsd4_open *open) + struct nfsd4_open *open, bool new_stp) { struct nfsd_file *nf = NULL; __be32 status; @@ -4742,6 +5159,13 @@ static __be32 nfs4_get_vfs_file(struct svc_rqst *rqstp, struct nfs4_file *fp, */ status = nfs4_file_check_deny(fp, open->op_share_deny); if (status != nfs_ok) { + if (status != nfserr_share_denied) { + spin_unlock(&fp->fi_lock); + goto out; + } + if (nfs4_resolve_deny_conflicts_locked(fp, new_stp, + stp, open->op_share_deny, false)) + status = nfserr_jukebox; spin_unlock(&fp->fi_lock); goto out; } @@ -4749,6 +5173,13 @@ static __be32 nfs4_get_vfs_file(struct svc_rqst *rqstp, struct nfs4_file *fp, /* set access to the file */ status = nfs4_file_get_access(fp, open->op_share_access); if (status != nfs_ok) { + if (status != nfserr_share_denied) { + spin_unlock(&fp->fi_lock); + goto out; + } + if (nfs4_resolve_deny_conflicts_locked(fp, new_stp, + stp, open->op_share_access, true)) + status = nfserr_jukebox; spin_unlock(&fp->fi_lock); goto out; } @@ -4764,9 +5195,20 @@ static __be32 nfs4_get_vfs_file(struct svc_rqst *rqstp, struct nfs4_file *fp, if (!fp->fi_fds[oflag]) { spin_unlock(&fp->fi_lock); - status = nfsd_file_acquire(rqstp, cur_fh, access, &nf); - if (status) - goto out_put_access; + + if (!open->op_filp) { + status = nfsd_file_acquire(rqstp, cur_fh, access, &nf); + if (status != nfs_ok) + goto out_put_access; + } else { + status = nfsd_file_create(rqstp, cur_fh, access, &nf); + if (status != nfs_ok) + goto out_put_access; + nf->nf_file = open->op_filp; + open->op_filp = NULL; + trace_nfsd_file_create(rqstp, access, nf); + } + spin_lock(&fp->fi_lock); if (!fp->fi_fds[oflag]) { fp->fi_fds[oflag] = nf; @@ -4777,6 +5219,11 @@ static __be32 nfs4_get_vfs_file(struct svc_rqst *rqstp, struct nfs4_file *fp, if (nf) nfsd_file_put(nf); + status = nfserrno(nfsd_open_break_lease(cur_fh->fh_dentry->d_inode, + access)); + if (status) + goto out_put_access; + status = nfsd4_truncate(rqstp, cur_fh, open); if (status) goto out_put_access; @@ -4790,21 +5237,29 @@ out_put_access: } static __be32 -nfs4_upgrade_open(struct svc_rqst *rqstp, struct nfs4_file *fp, struct svc_fh *cur_fh, struct nfs4_ol_stateid *stp, struct nfsd4_open *open) +nfs4_upgrade_open(struct svc_rqst *rqstp, struct nfs4_file *fp, + struct svc_fh *cur_fh, struct nfs4_ol_stateid *stp, + struct nfsd4_open *open) { __be32 status; unsigned char old_deny_bmap = stp->st_deny_bmap; if (!test_access(open->op_share_access, stp)) - return nfs4_get_vfs_file(rqstp, fp, cur_fh, stp, open); + return nfs4_get_vfs_file(rqstp, fp, cur_fh, stp, open, false); /* test and set deny mode */ spin_lock(&fp->fi_lock); status = nfs4_file_check_deny(fp, open->op_share_deny); if (status == nfs_ok) { - set_deny(open->op_share_deny, stp); - fp->fi_share_deny |= + if (status != nfserr_share_denied) { + set_deny(open->op_share_deny, stp); + fp->fi_share_deny |= (open->op_share_deny & NFS4_SHARE_DENY_BOTH); + } else { + if (nfs4_resolve_deny_conflicts_locked(fp, false, + stp, open->op_share_deny, false)) + status = nfserr_jukebox; + } } spin_unlock(&fp->fi_lock); @@ -4848,11 +5303,101 @@ static struct file_lock *nfs4_alloc_init_lease(struct nfs4_delegation *dp, return fl; } +static int nfsd4_check_conflicting_opens(struct nfs4_client *clp, + struct nfs4_file *fp) +{ + struct nfs4_ol_stateid *st; + struct file *f = fp->fi_deleg_file->nf_file; + struct inode *ino = locks_inode(f); + int writes; + + writes = atomic_read(&ino->i_writecount); + if (!writes) + return 0; + /* + * There could be multiple filehandles (hence multiple + * nfs4_files) referencing this file, but that's not too + * common; let's just give up in that case rather than + * trying to go look up all the clients using that other + * nfs4_file as well: + */ + if (fp->fi_aliased) + return -EAGAIN; + /* + * If there's a close in progress, make sure that we see it + * clear any fi_fds[] entries before we see it decrement + * i_writecount: + */ + smp_mb__after_atomic(); + + if (fp->fi_fds[O_WRONLY]) + writes--; + if (fp->fi_fds[O_RDWR]) + writes--; + if (writes > 0) + return -EAGAIN; /* There may be non-NFSv4 writers */ + /* + * It's possible there are non-NFSv4 write opens in progress, + * but if they haven't incremented i_writecount yet then they + * also haven't called break lease yet; so, they'll break this + * lease soon enough. So, all that's left to check for is NFSv4 + * opens: + */ + spin_lock(&fp->fi_lock); + list_for_each_entry(st, &fp->fi_stateids, st_perfile) { + if (st->st_openstp == NULL /* it's an open */ && + access_permit_write(st) && + st->st_stid.sc_client != clp) { + spin_unlock(&fp->fi_lock); + return -EAGAIN; + } + } + spin_unlock(&fp->fi_lock); + /* + * There's a small chance that we could be racing with another + * NFSv4 open. However, any open that hasn't added itself to + * the fi_stateids list also hasn't called break_lease yet; so, + * they'll break this lease soon enough. + */ + return 0; +} + +/* + * It's possible that between opening the dentry and setting the delegation, + * that it has been renamed or unlinked. Redo the lookup to verify that this + * hasn't happened. + */ +static int +nfsd4_verify_deleg_dentry(struct nfsd4_open *open, struct nfs4_file *fp, + struct svc_fh *parent) +{ + struct svc_export *exp; + struct dentry *child; + __be32 err; + + err = nfsd_lookup_dentry(open->op_rqstp, parent, + open->op_fname, open->op_fnamelen, + &exp, &child); + + if (err) + return -EAGAIN; + + exp_put(exp); + dput(child); + if (child != file_dentry(fp->fi_deleg_file->nf_file)) + return -EAGAIN; + + return 0; +} + static struct nfs4_delegation * -nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh, - struct nfs4_file *fp, struct nfs4_clnt_odstate *odstate) +nfs4_set_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp, + struct svc_fh *parent) { int status = 0; + struct nfs4_client *clp = stp->st_stid.sc_client; + struct nfs4_file *fp = stp->st_stid.sc_file; + struct nfs4_clnt_odstate *odstate = stp->st_clnt_odstate; struct nfs4_delegation *dp; struct nfsd_file *nf; struct file_lock *fl; @@ -4867,9 +5412,12 @@ nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh, nf = find_readable_file(fp); if (!nf) { - /* We should always have a readable file here */ - WARN_ON_ONCE(1); - return ERR_PTR(-EBADF); + /* + * We probably could attempt another open and get a read + * delegation, but for now, don't bother until the + * client actually sends us one. + */ + return ERR_PTR(-EAGAIN); } spin_lock(&state_lock); spin_lock(&fp->fi_lock); @@ -4891,7 +5439,7 @@ nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh, return ERR_PTR(status); status = -ENOMEM; - dp = alloc_init_deleg(clp, fp, fh, odstate); + dp = alloc_init_deleg(clp, fp, odstate); if (!dp) goto out_delegees; @@ -4905,6 +5453,16 @@ nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh, if (status) goto out_clnt_odstate; + if (parent) { + status = nfsd4_verify_deleg_dentry(open, fp, parent); + if (status) + goto out_unlock; + } + + status = nfsd4_check_conflicting_opens(clp, fp); + if (status) + goto out_unlock; + spin_lock(&state_lock); spin_lock(&fp->fi_lock); if (fp->fi_had_conflict) @@ -4956,12 +5514,13 @@ static void nfsd4_open_deleg_none_ext(struct nfsd4_open *open, int status) * proper support for them. */ static void -nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, - struct nfs4_ol_stateid *stp) +nfs4_open_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp, + struct svc_fh *currentfh) { struct nfs4_delegation *dp; struct nfs4_openowner *oo = openowner(stp->st_stateowner); struct nfs4_client *clp = stp->st_stid.sc_client; + struct svc_fh *parent = NULL; int cb_up; int status = 0; @@ -4975,6 +5534,8 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, goto out_no_deleg; break; case NFS4_OPEN_CLAIM_NULL: + parent = currentfh; + fallthrough; case NFS4_OPEN_CLAIM_FH: /* * Let's not give out any delegations till everyone's @@ -4985,29 +5546,17 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, goto out_no_deleg; if (!cb_up || !(oo->oo_flags & NFS4_OO_CONFIRMED)) goto out_no_deleg; - /* - * Also, if the file was opened for write or - * create, there's a good chance the client's - * about to write to it, resulting in an - * immediate recall (since we don't support - * write delegations): - */ - if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE) - goto out_no_deleg; - if (open->op_create == NFS4_OPEN_CREATE) - goto out_no_deleg; break; default: goto out_no_deleg; } - dp = nfs4_set_delegation(clp, fh, stp->st_stid.sc_file, stp->st_clnt_odstate); + dp = nfs4_set_delegation(open, stp, parent); if (IS_ERR(dp)) goto out_no_deleg; memcpy(&open->op_delegate_stateid, &dp->dl_stid.sc_stateid, sizeof(dp->dl_stid.sc_stateid)); - dprintk("NFSD: delegation stateid=" STATEID_FMT "\n", - STATEID_VAL(&dp->dl_stid.sc_stateid)); + trace_nfsd_deleg_read(&dp->dl_stid.sc_stateid); open->op_delegate_type = NFS4_OPEN_DELEGATE_READ; nfs4_put_stid(&dp->dl_stid); return; @@ -5043,6 +5592,18 @@ static void nfsd4_deleg_xgrade_none_ext(struct nfsd4_open *open, */ } +/** + * nfsd4_process_open2 - finish open processing + * @rqstp: the RPC transaction being executed + * @current_fh: NFSv4 COMPOUND's current filehandle + * @open: OPEN arguments + * + * If successful, (1) truncate the file if open->op_truncate was + * set, (2) set open->op_stateid, (3) set open->op_delegation. + * + * Returns %nfs_ok on success; otherwise an nfs4stat value in + * network byte order is returned. + */ __be32 nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open) { @@ -5059,7 +5620,7 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf * and check for delegations in the process of being recalled. * If not found, create the nfs4_file struct */ - fp = find_or_add_file(open->op_file, ¤t_fh->fh_handle); + fp = find_or_add_file(open->op_file, current_fh); if (fp != open->op_file) { status = nfs4_check_deleg(cl, open, &dp); if (status) @@ -5092,7 +5653,7 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf goto out; } } else { - status = nfs4_get_vfs_file(rqstp, fp, current_fh, stp, open); + status = nfs4_get_vfs_file(rqstp, fp, current_fh, stp, open, true); if (status) { stp->st_stid.sc_type = NFS4_CLOSED_STID; release_open_stateid(stp); @@ -5121,12 +5682,10 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf * Attempt to hand out a delegation. No error return, because the * OPEN succeeds even if we fail. */ - nfs4_open_delegation(current_fh, open, stp); + nfs4_open_delegation(open, stp, &resp->cstate.current_fh); nodeleg: status = nfs_ok; - - dprintk("%s: stateid=" STATEID_FMT "\n", __func__, - STATEID_VAL(&stp->st_stid.sc_stateid)); + trace_nfsd_open(&stp->st_stid.sc_stateid); out: /* 4.1 client trying to upgrade/downgrade delegation? */ if (open->op_delegate_type == NFS4_OPEN_DELEGATE_NONE && dp && @@ -5180,19 +5739,15 @@ nfsd4_renew(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, __be32 status; struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); - dprintk("process_renew(%08x/%08x): starting\n", - clid->cl_boot, clid->cl_id); - status = lookup_clientid(clid, cstate, nn, false); + trace_nfsd_clid_renew(clid); + status = set_client(clid, cstate, nn); if (status) - goto out; + return status; clp = cstate->clp; - status = nfserr_cb_path_down; if (!list_empty(&clp->cl_delegations) && clp->cl_cb_state != NFSD4_CB_UP) - goto out; - status = nfs_ok; -out: - return status; + return nfserr_cb_path_down; + return nfs_ok; } void @@ -5202,6 +5757,7 @@ nfsd4_end_grace(struct nfsd_net *nn) if (nn->grace_ended) return; + trace_nfsd_grace_complete(nn); nn->grace_ended = true; /* * If the server goes down again right now, an NFSv4 @@ -5252,71 +5808,245 @@ static bool clients_still_reclaiming(struct nfsd_net *nn) return true; } +struct laundry_time { + time64_t cutoff; + time64_t new_timeo; +}; + +static bool state_expired(struct laundry_time *lt, time64_t last_refresh) +{ + time64_t time_remaining; + + if (last_refresh < lt->cutoff) + return true; + time_remaining = last_refresh - lt->cutoff; + lt->new_timeo = min(lt->new_timeo, time_remaining); + return false; +} + +#ifdef CONFIG_NFSD_V4_2_INTER_SSC +void nfsd4_ssc_init_umount_work(struct nfsd_net *nn) +{ + spin_lock_init(&nn->nfsd_ssc_lock); + INIT_LIST_HEAD(&nn->nfsd_ssc_mount_list); + init_waitqueue_head(&nn->nfsd_ssc_waitq); +} +EXPORT_SYMBOL_GPL(nfsd4_ssc_init_umount_work); + +/* + * This is called when nfsd is being shutdown, after all inter_ssc + * cleanup were done, to destroy the ssc delayed unmount list. + */ +static void nfsd4_ssc_shutdown_umount(struct nfsd_net *nn) +{ + struct nfsd4_ssc_umount_item *ni = NULL; + struct nfsd4_ssc_umount_item *tmp; + + spin_lock(&nn->nfsd_ssc_lock); + list_for_each_entry_safe(ni, tmp, &nn->nfsd_ssc_mount_list, nsui_list) { + list_del(&ni->nsui_list); + spin_unlock(&nn->nfsd_ssc_lock); + mntput(ni->nsui_vfsmount); + kfree(ni); + spin_lock(&nn->nfsd_ssc_lock); + } + spin_unlock(&nn->nfsd_ssc_lock); +} + +static void nfsd4_ssc_expire_umount(struct nfsd_net *nn) +{ + bool do_wakeup = false; + struct nfsd4_ssc_umount_item *ni = NULL; + struct nfsd4_ssc_umount_item *tmp; + + spin_lock(&nn->nfsd_ssc_lock); + list_for_each_entry_safe(ni, tmp, &nn->nfsd_ssc_mount_list, nsui_list) { + if (time_after(jiffies, ni->nsui_expire)) { + if (refcount_read(&ni->nsui_refcnt) > 1) + continue; + + /* mark being unmount */ + ni->nsui_busy = true; + spin_unlock(&nn->nfsd_ssc_lock); + mntput(ni->nsui_vfsmount); + spin_lock(&nn->nfsd_ssc_lock); + + /* waiters need to start from begin of list */ + list_del(&ni->nsui_list); + kfree(ni); + + /* wakeup ssc_connect waiters */ + do_wakeup = true; + continue; + } + break; + } + if (do_wakeup) + wake_up_all(&nn->nfsd_ssc_waitq); + spin_unlock(&nn->nfsd_ssc_lock); +} +#endif + +/* Check if any lock belonging to this lockowner has any blockers */ +static bool +nfs4_lockowner_has_blockers(struct nfs4_lockowner *lo) +{ + struct file_lock_context *ctx; + struct nfs4_ol_stateid *stp; + struct nfs4_file *nf; + + list_for_each_entry(stp, &lo->lo_owner.so_stateids, st_perstateowner) { + nf = stp->st_stid.sc_file; + ctx = nf->fi_inode->i_flctx; + if (!ctx) + continue; + if (locks_owner_has_blockers(ctx, lo)) + return true; + } + return false; +} + +static bool +nfs4_anylock_blockers(struct nfs4_client *clp) +{ + int i; + struct nfs4_stateowner *so; + struct nfs4_lockowner *lo; + + if (atomic_read(&clp->cl_delegs_in_recall)) + return true; + spin_lock(&clp->cl_lock); + for (i = 0; i < OWNER_HASH_SIZE; i++) { + list_for_each_entry(so, &clp->cl_ownerstr_hashtbl[i], + so_strhash) { + if (so->so_is_open_owner) + continue; + lo = lockowner(so); + if (nfs4_lockowner_has_blockers(lo)) { + spin_unlock(&clp->cl_lock); + return true; + } + } + } + spin_unlock(&clp->cl_lock); + return false; +} + +static void +nfs4_get_client_reaplist(struct nfsd_net *nn, struct list_head *reaplist, + struct laundry_time *lt) +{ + unsigned int maxreap, reapcnt = 0; + struct list_head *pos, *next; + struct nfs4_client *clp; + + maxreap = (atomic_read(&nn->nfs4_client_count) >= nn->nfs4_max_clients) ? + NFSD_CLIENT_MAX_TRIM_PER_RUN : 0; + INIT_LIST_HEAD(reaplist); + spin_lock(&nn->client_lock); + list_for_each_safe(pos, next, &nn->client_lru) { + clp = list_entry(pos, struct nfs4_client, cl_lru); + if (clp->cl_state == NFSD4_EXPIRABLE) + goto exp_client; + if (!state_expired(lt, clp->cl_time)) + break; + if (!atomic_read(&clp->cl_rpc_users)) { + if (clp->cl_state == NFSD4_ACTIVE) + atomic_inc(&nn->nfsd_courtesy_clients); + clp->cl_state = NFSD4_COURTESY; + } + if (!client_has_state(clp)) + goto exp_client; + if (!nfs4_anylock_blockers(clp)) + if (reapcnt >= maxreap) + continue; +exp_client: + if (!mark_client_expired_locked(clp)) { + list_add(&clp->cl_lru, reaplist); + reapcnt++; + } + } + spin_unlock(&nn->client_lock); +} + +static void +nfs4_get_courtesy_client_reaplist(struct nfsd_net *nn, + struct list_head *reaplist) +{ + unsigned int maxreap = 0, reapcnt = 0; + struct list_head *pos, *next; + struct nfs4_client *clp; + + maxreap = NFSD_CLIENT_MAX_TRIM_PER_RUN; + INIT_LIST_HEAD(reaplist); + + spin_lock(&nn->client_lock); + list_for_each_safe(pos, next, &nn->client_lru) { + clp = list_entry(pos, struct nfs4_client, cl_lru); + if (clp->cl_state == NFSD4_ACTIVE) + break; + if (reapcnt >= maxreap) + break; + if (!mark_client_expired_locked(clp)) { + list_add(&clp->cl_lru, reaplist); + reapcnt++; + } + } + spin_unlock(&nn->client_lock); +} + +static void +nfs4_process_client_reaplist(struct list_head *reaplist) +{ + struct list_head *pos, *next; + struct nfs4_client *clp; + + list_for_each_safe(pos, next, reaplist) { + clp = list_entry(pos, struct nfs4_client, cl_lru); + trace_nfsd_clid_purged(&clp->cl_clientid); + list_del_init(&clp->cl_lru); + expire_client(clp); + } +} + static time64_t nfs4_laundromat(struct nfsd_net *nn) { - struct nfs4_client *clp; struct nfs4_openowner *oo; struct nfs4_delegation *dp; struct nfs4_ol_stateid *stp; struct nfsd4_blocked_lock *nbl; struct list_head *pos, *next, reaplist; - time64_t cutoff = ktime_get_boottime_seconds() - nn->nfsd4_lease; - time64_t t, new_timeo = nn->nfsd4_lease; + struct laundry_time lt = { + .cutoff = ktime_get_boottime_seconds() - nn->nfsd4_lease, + .new_timeo = nn->nfsd4_lease + }; struct nfs4_cpntf_state *cps; copy_stateid_t *cps_t; int i; - dprintk("NFSD: laundromat service - starting\n"); - if (clients_still_reclaiming(nn)) { - new_timeo = 0; + lt.new_timeo = 0; goto out; } - dprintk("NFSD: end of grace period\n"); nfsd4_end_grace(nn); - INIT_LIST_HEAD(&reaplist); spin_lock(&nn->s2s_cp_lock); idr_for_each_entry(&nn->s2s_cp_stateids, cps_t, i) { cps = container_of(cps_t, struct nfs4_cpntf_state, cp_stateid); - if (cps->cp_stateid.sc_type == NFS4_COPYNOTIFY_STID && - cps->cpntf_time > cutoff) + if (cps->cp_stateid.cs_type == NFS4_COPYNOTIFY_STID && + state_expired(<, cps->cpntf_time)) _free_cpntf_state_locked(nn, cps); } spin_unlock(&nn->s2s_cp_lock); + nfs4_get_client_reaplist(nn, &reaplist, <); + nfs4_process_client_reaplist(&reaplist); - spin_lock(&nn->client_lock); - list_for_each_safe(pos, next, &nn->client_lru) { - clp = list_entry(pos, struct nfs4_client, cl_lru); - if (clp->cl_time > cutoff) { - t = clp->cl_time - cutoff; - new_timeo = min(new_timeo, t); - break; - } - if (mark_client_expired_locked(clp)) { - dprintk("NFSD: client in use (clientid %08x)\n", - clp->cl_clientid.cl_id); - continue; - } - list_add(&clp->cl_lru, &reaplist); - } - spin_unlock(&nn->client_lock); - list_for_each_safe(pos, next, &reaplist) { - clp = list_entry(pos, struct nfs4_client, cl_lru); - dprintk("NFSD: purging unused client (clientid %08x)\n", - clp->cl_clientid.cl_id); - list_del_init(&clp->cl_lru); - expire_client(clp); - } spin_lock(&state_lock); list_for_each_safe(pos, next, &nn->del_recall_lru) { dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru); - if (dp->dl_time > cutoff) { - t = dp->dl_time - cutoff; - new_timeo = min(new_timeo, t); + if (!state_expired(<, dp->dl_time)) break; - } WARN_ON(!unhash_delegation_locked(dp)); list_add(&dp->dl_recall_lru, &reaplist); } @@ -5332,11 +6062,8 @@ nfs4_laundromat(struct nfsd_net *nn) while (!list_empty(&nn->close_lru)) { oo = list_first_entry(&nn->close_lru, struct nfs4_openowner, oo_close_lru); - if (oo->oo_time > cutoff) { - t = oo->oo_time - cutoff; - new_timeo = min(new_timeo, t); + if (!state_expired(<, oo->oo_time)) break; - } list_del_init(&oo->oo_close_lru); stp = oo->oo_last_closed_stid; oo->oo_last_closed_stid = NULL; @@ -5362,11 +6089,8 @@ nfs4_laundromat(struct nfsd_net *nn) while (!list_empty(&nn->blocked_locks_lru)) { nbl = list_first_entry(&nn->blocked_locks_lru, struct nfsd4_blocked_lock, nbl_lru); - if (nbl->nbl_time > cutoff) { - t = nbl->nbl_time - cutoff; - new_timeo = min(new_timeo, t); + if (!state_expired(<, nbl->nbl_time)) break; - } list_move(&nbl->nbl_lru, &reaplist); list_del_init(&nbl->nbl_list); } @@ -5378,12 +6102,14 @@ nfs4_laundromat(struct nfsd_net *nn) list_del_init(&nbl->nbl_lru); free_blocked_lock(nbl); } +#ifdef CONFIG_NFSD_V4_2_INTER_SSC + /* service the server-to-server copy delayed unmount list */ + nfsd4_ssc_expire_umount(nn); +#endif out: - new_timeo = max_t(time64_t, new_timeo, NFSD_LAUNDROMAT_MINTIMEOUT); - return new_timeo; + return max_t(time64_t, lt.new_timeo, NFSD_LAUNDROMAT_MINTIMEOUT); } -static struct workqueue_struct *laundry_wq; static void laundromat_main(struct work_struct *); static void @@ -5395,30 +6121,26 @@ laundromat_main(struct work_struct *laundry) laundromat_work); t = nfs4_laundromat(nn); - dprintk("NFSD: laundromat_main - sleeping for %lld seconds\n", t); queue_delayed_work(laundry_wq, &nn->laundromat_work, t*HZ); } -static inline __be32 nfs4_check_fh(struct svc_fh *fhp, struct nfs4_stid *stp) +static void +courtesy_client_reaper(struct work_struct *reaper) { - if (!fh_match(&fhp->fh_handle, &stp->sc_file->fi_fhandle)) - return nfserr_bad_stateid; - return nfs_ok; -} + struct list_head reaplist; + struct delayed_work *dwork = to_delayed_work(reaper); + struct nfsd_net *nn = container_of(dwork, struct nfsd_net, + nfsd_shrinker_work); -static inline int -access_permit_read(struct nfs4_ol_stateid *stp) -{ - return test_access(NFS4_SHARE_ACCESS_READ, stp) || - test_access(NFS4_SHARE_ACCESS_BOTH, stp) || - test_access(NFS4_SHARE_ACCESS_WRITE, stp); + nfs4_get_courtesy_client_reaplist(nn, &reaplist); + nfs4_process_client_reaplist(&reaplist); } -static inline int -access_permit_write(struct nfs4_ol_stateid *stp) +static inline __be32 nfs4_check_fh(struct svc_fh *fhp, struct nfs4_stid *stp) { - return test_access(NFS4_SHARE_ACCESS_WRITE, stp) || - test_access(NFS4_SHARE_ACCESS_BOTH, stp); + if (!fh_match(&fhp->fh_handle, &stp->sc_file->fi_fhandle)) + return nfserr_bad_stateid; + return nfs_ok; } static @@ -5455,16 +6177,6 @@ check_special_stateids(struct net *net, svc_fh *current_fh, stateid_t *stateid, NFS4_SHARE_DENY_READ); } -/* - * Allow READ/WRITE during grace period on recovered state only for files - * that are not able to provide mandatory locking. - */ -static inline int -grace_disallows_io(struct net *net, struct inode *inode) -{ - return opens_in_grace(net) && mandatory_lock(inode); -} - static __be32 check_stateid_generation(stateid_t *in, stateid_t *ref, bool has_session) { /* @@ -5521,15 +6233,8 @@ static __be32 nfsd4_validate_stateid(struct nfs4_client *cl, stateid_t *stateid) if (ZERO_STATEID(stateid) || ONE_STATEID(stateid) || CLOSE_STATEID(stateid)) return status; - /* Client debugging aid. */ - if (!same_clid(&stateid->si_opaque.so_clid, &cl->cl_clientid)) { - char addr_str[INET6_ADDRSTRLEN]; - rpc_ntop((struct sockaddr *)&cl->cl_addr, addr_str, - sizeof(addr_str)); - pr_warn_ratelimited("NFSD: client %s testing state ID " - "with incorrect client ID\n", addr_str); + if (!same_clid(&stateid->si_opaque.so_clid, &cl->cl_clientid)) return status; - } spin_lock(&cl->cl_lock); s = find_stateid_locked(cl, stateid); if (!s) @@ -5550,7 +6255,7 @@ static __be32 nfsd4_validate_stateid(struct nfs4_client *cl, stateid_t *stateid) break; default: printk("unknown stateid type %x\n", s->sc_type); - /* Fallthrough */ + fallthrough; case NFS4_CLOSED_STID: case NFS4_CLOSED_DELEG_STID: status = nfserr_bad_stateid; @@ -5566,6 +6271,7 @@ nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate, struct nfs4_stid **s, struct nfsd_net *nn) { __be32 status; + struct nfs4_stid *stid; bool return_revoked = false; /* @@ -5580,8 +6286,7 @@ nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate, if (ZERO_STATEID(stateid) || ONE_STATEID(stateid) || CLOSE_STATEID(stateid)) return nfserr_bad_stateid; - status = lookup_clientid(&stateid->si_opaque.so_clid, cstate, nn, - false); + status = set_client(&stateid->si_opaque.so_clid, cstate, nn); if (status == nfserr_stale_clientid) { if (cstate->session) return nfserr_bad_stateid; @@ -5589,15 +6294,16 @@ nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate, } if (status) return status; - *s = find_stateid_by_type(cstate->clp, stateid, typemask); - if (!*s) + stid = find_stateid_by_type(cstate->clp, stateid, typemask); + if (!stid) return nfserr_bad_stateid; - if (((*s)->sc_type == NFS4_REVOKED_DELEG_STID) && !return_revoked) { - nfs4_put_stid(*s); + if ((stid->sc_type == NFS4_REVOKED_DELEG_STID) && !return_revoked) { + nfs4_put_stid(stid); if (cstate->minorversion) return nfserr_deleg_revoked; return nfserr_bad_stateid; } + *s = stid; return nfs_ok; } @@ -5618,7 +6324,6 @@ nfs4_find_file(struct nfs4_stid *s, int flags) return find_readable_file(s->sc_file); else return find_writeable_file(s->sc_file); - break; } return NULL; @@ -5663,12 +6368,12 @@ out: static void _free_cpntf_state_locked(struct nfsd_net *nn, struct nfs4_cpntf_state *cps) { - WARN_ON_ONCE(cps->cp_stateid.sc_type != NFS4_COPYNOTIFY_STID); - if (!refcount_dec_and_test(&cps->cp_stateid.sc_count)) + WARN_ON_ONCE(cps->cp_stateid.cs_type != NFS4_COPYNOTIFY_STID); + if (!refcount_dec_and_test(&cps->cp_stateid.cs_count)) return; list_del(&cps->cp_list); idr_remove(&nn->s2s_cp_stateids, - cps->cp_stateid.stid.si_opaque.so_id); + cps->cp_stateid.cs_stid.si_opaque.so_id); kfree(cps); } /* @@ -5690,12 +6395,12 @@ __be32 manage_cpntf_state(struct nfsd_net *nn, stateid_t *st, if (cps_t) { state = container_of(cps_t, struct nfs4_cpntf_state, cp_stateid); - if (state->cp_stateid.sc_type != NFS4_COPYNOTIFY_STID) { + if (state->cp_stateid.cs_type != NFS4_COPYNOTIFY_STID) { state = NULL; goto unlock; } if (!clp) - refcount_inc(&state->cp_stateid.sc_count); + refcount_inc(&state->cp_stateid.cs_count); else _free_cpntf_state_locked(nn, state); } @@ -5713,21 +6418,27 @@ static __be32 find_cpntf_state(struct nfsd_net *nn, stateid_t *st, { __be32 status; struct nfs4_cpntf_state *cps = NULL; - struct nfsd4_compound_state cstate; + struct nfs4_client *found; status = manage_cpntf_state(nn, st, NULL, &cps); if (status) return status; cps->cpntf_time = ktime_get_boottime_seconds(); - memset(&cstate, 0, sizeof(cstate)); - status = lookup_clientid(&cps->cp_p_clid, &cstate, nn, true); - if (status) + + status = nfserr_expired; + found = lookup_clientid(&cps->cp_p_clid, true, nn); + if (!found) goto out; - status = nfsd4_lookup_stateid(&cstate, &cps->cp_p_stateid, - NFS4_DELEG_STID|NFS4_OPEN_STID|NFS4_LOCK_STID, - stid, nn); - put_client_renew(cstate.clp); + + *stid = find_stateid_by_type(found, &cps->cp_p_stateid, + NFS4_DELEG_STID|NFS4_OPEN_STID|NFS4_LOCK_STID); + if (*stid) + status = nfs_ok; + else + status = nfserr_bad_stateid; + + put_client_renew(found); out: nfs4_put_cpntf_state(nn, cps); return status; @@ -5749,7 +6460,6 @@ nfs4_preprocess_stateid_op(struct svc_rqst *rqstp, stateid_t *stateid, int flags, struct nfsd_file **nfp, struct nfs4_stid **cstid) { - struct inode *ino = d_inode(fhp->fh_dentry); struct net *net = SVC_NET(rqstp); struct nfsd_net *nn = net_generic(net, nfsd_net_id); struct nfs4_stid *s = NULL; @@ -5758,11 +6468,12 @@ nfs4_preprocess_stateid_op(struct svc_rqst *rqstp, if (nfp) *nfp = NULL; - if (grace_disallows_io(net, ino)) - return nfserr_grace; - if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) { - status = check_special_stateids(net, fhp, stateid, flags); + if (cstid) + status = nfserr_bad_stateid; + else + status = check_special_stateids(net, fhp, stateid, + flags); goto done; } @@ -5816,7 +6527,7 @@ nfsd4_test_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, { struct nfsd4_test_stateid *test_stateid = &u->test_stateid; struct nfsd4_test_stateid_id *stateid; - struct nfs4_client *cl = cstate->session->se_client; + struct nfs4_client *cl = cstate->clp; list_for_each_entry(stateid, &test_stateid->ts_stateid_list, ts_id_list) stateid->ts_id_status = @@ -5862,7 +6573,7 @@ nfsd4_free_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, stateid_t *stateid = &free_stateid->fr_stateid; struct nfs4_stid *s; struct nfs4_delegation *dp; - struct nfs4_client *cl = cstate->session->se_client; + struct nfs4_client *cl = cstate->clp; __be32 ret = nfserr_bad_stateid; spin_lock(&cl->cl_lock); @@ -5943,8 +6654,7 @@ nfs4_preprocess_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid, struct nfs4_stid *s; struct nfs4_ol_stateid *stp = NULL; - dprintk("NFSD: %s: seqid=%d stateid = " STATEID_FMT "\n", __func__, - seqid, STATEID_VAL(stateid)); + trace_nfsd_preprocess(seqid, stateid); *stpp = NULL; status = nfsd4_lookup_stateid(cstate, stateid, typemask, &s, nn); @@ -6013,9 +6723,7 @@ nfsd4_open_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, oo->oo_flags |= NFS4_OO_CONFIRMED; nfs4_inc_and_copy_stateid(&oc->oc_resp_stateid, &stp->st_stid); mutex_unlock(&stp->st_mutex); - dprintk("NFSD: %s: success, seqid=%d stateid=" STATEID_FMT "\n", - __func__, oc->oc_seqid, STATEID_VAL(&stp->st_stid.sc_stateid)); - + trace_nfsd_open_confirm(oc->oc_seqid, &stp->st_stid.sc_stateid); nfsd4_client_record_create(oo->oo_owner.so_client); status = nfs_ok; put_stateid: @@ -6100,6 +6808,7 @@ static void nfsd4_close_open_stateid(struct nfs4_ol_stateid *s) struct nfs4_client *clp = s->st_stid.sc_client; bool unhashed; LIST_HEAD(reaplist); + struct nfs4_ol_stateid *stp; spin_lock(&clp->cl_lock); unhashed = unhash_open_stateid(s, &reaplist); @@ -6108,6 +6817,8 @@ static void nfsd4_close_open_stateid(struct nfs4_ol_stateid *s) if (unhashed) put_ol_stateid_locked(s, &reaplist); spin_unlock(&clp->cl_lock); + list_for_each_entry(stp, &reaplist, st_locks) + nfs4_free_cpntf_statelist(clp->net, &stp->st_stid); free_ol_stateid_reaplist(&reaplist); } else { spin_unlock(&clp->cl_lock); @@ -6191,6 +6902,7 @@ nfsd4_delegreturn(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (status) goto put_stateid; + wake_up_var(d_inode(cstate->current_fh.fh_dentry)); destroy_delegation(dp); put_stateid: nfs4_put_stid(&dp->dl_stid); @@ -6198,15 +6910,6 @@ out: return status; } -static inline u64 -end_offset(u64 start, u64 len) -{ - u64 end; - - end = start + len; - return end >= start ? end: NFS4_MAX_UINT64; -} - /* last octet in a range */ static inline u64 last_byte_offset(u64 start, u64 len) @@ -6236,7 +6939,7 @@ nfs4_transform_lock_offset(struct file_lock *lock) } static fl_owner_t -nfsd4_fl_get_owner(fl_owner_t owner) +nfsd4_lm_get_owner(fl_owner_t owner) { struct nfs4_lockowner *lo = (struct nfs4_lockowner *)owner; @@ -6245,7 +6948,7 @@ nfsd4_fl_get_owner(fl_owner_t owner) } static void -nfsd4_fl_put_owner(fl_owner_t owner) +nfsd4_lm_put_owner(fl_owner_t owner) { struct nfs4_lockowner *lo = (struct nfs4_lockowner *)owner; @@ -6253,6 +6956,29 @@ nfsd4_fl_put_owner(fl_owner_t owner) nfs4_put_stateowner(&lo->lo_owner); } +/* return pointer to struct nfs4_client if client is expirable */ +static bool +nfsd4_lm_lock_expirable(struct file_lock *cfl) +{ + struct nfs4_lockowner *lo = (struct nfs4_lockowner *)cfl->fl_owner; + struct nfs4_client *clp = lo->lo_owner.so_client; + struct nfsd_net *nn; + + if (try_to_expire_client(clp)) { + nn = net_generic(clp->net, nfsd_net_id); + mod_delayed_work(laundry_wq, &nn->laundromat_work, 0); + return true; + } + return false; +} + +/* schedule laundromat to run immediately and wait for it to complete */ +static void +nfsd4_lm_expire_lock(void) +{ + flush_workqueue(laundry_wq); +} + static void nfsd4_lm_notify(struct file_lock *fl) { @@ -6272,14 +6998,19 @@ nfsd4_lm_notify(struct file_lock *fl) } spin_unlock(&nn->blocked_locks_lock); - if (queue) + if (queue) { + trace_nfsd_cb_notify_lock(lo, nbl); nfsd4_run_cb(&nbl->nbl_cb); + } } static const struct lock_manager_operations nfsd_posix_mng_ops = { + .lm_mod_owner = THIS_MODULE, .lm_notify = nfsd4_lm_notify, - .lm_get_owner = nfsd4_fl_get_owner, - .lm_put_owner = nfsd4_fl_put_owner, + .lm_get_owner = nfsd4_lm_get_owner, + .lm_put_owner = nfsd4_lm_put_owner, + .lm_lock_expirable = nfsd4_lm_lock_expirable, + .lm_expire_lock = nfsd4_lm_expire_lock, }; static inline void @@ -6393,21 +7124,21 @@ alloc_init_lock_stateowner(unsigned int strhashval, struct nfs4_client *clp, } static struct nfs4_ol_stateid * -find_lock_stateid(struct nfs4_lockowner *lo, struct nfs4_file *fp) +find_lock_stateid(const struct nfs4_lockowner *lo, + const struct nfs4_ol_stateid *ost) { struct nfs4_ol_stateid *lst; - struct nfs4_client *clp = lo->lo_owner.so_client; - lockdep_assert_held(&clp->cl_lock); + lockdep_assert_held(&ost->st_stid.sc_client->cl_lock); - list_for_each_entry(lst, &lo->lo_owner.so_stateids, st_perstateowner) { - if (lst->st_stid.sc_type != NFS4_LOCK_STID) - continue; - if (lst->st_stid.sc_file == fp) { - refcount_inc(&lst->st_stid.sc_count); - return lst; + /* If ost is not hashed, ost->st_locks will not be valid */ + if (!nfs4_ol_stateid_unhashed(ost)) + list_for_each_entry(lst, &ost->st_locks, st_locks) { + if (lst->st_stateowner == &lo->lo_owner) { + refcount_inc(&lst->st_stid.sc_count); + return lst; + } } - } return NULL; } @@ -6423,11 +7154,11 @@ init_lock_stateid(struct nfs4_ol_stateid *stp, struct nfs4_lockowner *lo, mutex_lock_nested(&stp->st_mutex, OPEN_STATEID_MUTEX); retry: spin_lock(&clp->cl_lock); - spin_lock(&fp->fi_lock); - retstp = find_lock_stateid(lo, fp); + if (nfs4_ol_stateid_unhashed(open_stp)) + goto out_close; + retstp = find_lock_stateid(lo, open_stp); if (retstp) - goto out_unlock; - + goto out_found; refcount_inc(&stp->st_stid.sc_count); stp->st_stid.sc_type = NFS4_LOCK_STID; stp->st_stateowner = nfs4_get_stateowner(&lo->lo_owner); @@ -6436,22 +7167,26 @@ retry: stp->st_access_bmap = 0; stp->st_deny_bmap = open_stp->st_deny_bmap; stp->st_openstp = open_stp; + spin_lock(&fp->fi_lock); list_add(&stp->st_locks, &open_stp->st_locks); list_add(&stp->st_perstateowner, &lo->lo_owner.so_stateids); list_add(&stp->st_perfile, &fp->fi_stateids); -out_unlock: spin_unlock(&fp->fi_lock); spin_unlock(&clp->cl_lock); - if (retstp) { - if (nfsd4_lock_ol_stateid(retstp) != nfs_ok) { - nfs4_put_stid(&retstp->st_stid); - goto retry; - } - /* To keep mutex tracking happy */ - mutex_unlock(&stp->st_mutex); - stp = retstp; - } return stp; +out_found: + spin_unlock(&clp->cl_lock); + if (nfsd4_lock_ol_stateid(retstp) != nfs_ok) { + nfs4_put_stid(&retstp->st_stid); + goto retry; + } + /* To keep mutex tracking happy */ + mutex_unlock(&stp->st_mutex); + return retstp; +out_close: + spin_unlock(&clp->cl_lock); + mutex_unlock(&stp->st_mutex); + return NULL; } static struct nfs4_ol_stateid * @@ -6466,7 +7201,7 @@ find_or_create_lock_stateid(struct nfs4_lockowner *lo, struct nfs4_file *fi, *new = false; spin_lock(&clp->cl_lock); - lst = find_lock_stateid(lo, fi); + lst = find_lock_stateid(lo, ost); spin_unlock(&clp->cl_lock); if (lst != NULL) { if (nfsd4_lock_ol_stateid(lst) == nfs_ok) @@ -6590,13 +7325,9 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (nfsd4_has_session(cstate)) /* See rfc 5661 18.10.3: given clientid is ignored: */ memcpy(&lock->lk_new_clientid, - &cstate->session->se_client->cl_clientid, + &cstate->clp->cl_clientid, sizeof(clientid_t)); - status = nfserr_stale_clientid; - if (STALE_CLIENTID(&lock->lk_new_clientid, nn)) - goto out; - /* validate and update open stateid and open seqid */ status = nfs4_preprocess_confirmed_seqid_op(cstate, lock->lk_new_open_seqid, @@ -6634,12 +7365,15 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (!locks_in_grace(net) && lock->lk_reclaim) goto out; + if (lock->lk_reclaim) + fl_flags |= FL_RECLAIM; + fp = lock_stp->st_stid.sc_file; switch (lock->lk_type) { case NFS4_READW_LT: if (nfsd4_has_session(cstate)) fl_flags |= FL_SLEEP; - /* Fallthrough */ + fallthrough; case NFS4_READ_LT: spin_lock(&fp->fi_lock); nf = find_readable_file_locked(fp); @@ -6651,7 +7385,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, case NFS4_WRITEW_LT: if (nfsd4_has_session(cstate)) fl_flags |= FL_SLEEP; - /* Fallthrough */ + fallthrough; case NFS4_WRITE_LT: spin_lock(&fp->fi_lock); nf = find_writeable_file_locked(fp); @@ -6670,6 +7404,16 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, goto out; } + /* + * Most filesystems with their own ->lock operations will block + * the nfsd thread waiting to acquire the lock. That leads to + * deadlocks (we don't want every nfsd thread tied up waiting + * for file locks), so don't attempt blocking lock notifications + * on those filesystems: + */ + if (nf->nf_file->f_op->lock) + fl_flags &= ~FL_SLEEP; + nbl = find_or_allocate_block(lock_sop, &fp->fi_fhandle, nn); if (!nbl) { dprintk("NFSD: %s: unable to allocate block!\n", __func__); @@ -6700,6 +7444,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, spin_lock(&nn->blocked_locks_lock); list_add_tail(&nbl->nbl_list, &lock_sop->lo_blocked); list_add_tail(&nbl->nbl_lru, &nn->blocked_locks_lru); + kref_get(&nbl->nbl_kref); spin_unlock(&nn->blocked_locks_lock); } @@ -6712,8 +7457,9 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, nn->somebody_reclaimed = true; break; case FILE_LOCK_DEFERRED: + kref_put(&nbl->nbl_kref, free_nbl); nbl = NULL; - /* Fallthrough */ + fallthrough; case -EAGAIN: /* conflock holds conflicting lock */ status = nfserr_denied; dprintk("NFSD: nfsd4_lock: conflicting lock found!\n"); @@ -6732,8 +7478,13 @@ out: /* dequeue it if we queued it before */ if (fl_flags & FL_SLEEP) { spin_lock(&nn->blocked_locks_lock); - list_del_init(&nbl->nbl_list); - list_del_init(&nbl->nbl_lru); + if (!list_empty(&nbl->nbl_list) && + !list_empty(&nbl->nbl_lru)) { + list_del_init(&nbl->nbl_list); + list_del_init(&nbl->nbl_lru); + kref_put(&nbl->nbl_kref, free_nbl); + } + /* nbl can use one of lists to be linked to reaplist */ spin_unlock(&nn->blocked_locks_lock); } free_blocked_lock(nbl); @@ -6769,17 +7520,28 @@ out: /* * The NFSv4 spec allows a client to do a LOCKT without holding an OPEN, * so we do a temporary open here just to get an open file to pass to - * vfs_test_lock. (Arguably perhaps test_lock should be done with an - * inode operation.) + * vfs_test_lock. */ static __be32 nfsd_test_lock(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file_lock *lock) { struct nfsd_file *nf; - __be32 err = nfsd_file_acquire(rqstp, fhp, NFSD_MAY_READ, &nf); - if (!err) { - err = nfserrno(vfs_test_lock(nf->nf_file, lock)); - nfsd_file_put(nf); - } + struct inode *inode; + __be32 err; + + err = nfsd_file_acquire(rqstp, fhp, NFSD_MAY_READ, &nf); + if (err) + return err; + inode = fhp->fh_dentry->d_inode; + inode_lock(inode); /* to block new leases till after test_lock: */ + err = nfserrno(nfsd_open_break_lease(inode, NFSD_MAY_READ)); + if (err) + goto out; + lock->fl_file = nf->nf_file; + err = nfserrno(vfs_test_lock(nf->nf_file, lock)); + lock->fl_file = NULL; +out: + inode_unlock(inode); + nfsd_file_put(nf); return err; } @@ -6803,8 +7565,7 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, return nfserr_inval; if (!nfsd4_has_session(cstate)) { - status = lookup_clientid(&lockt->lt_clientid, cstate, nn, - false); + status = set_client(&lockt->lt_clientid, cstate, nn); if (status) goto out; } @@ -6968,57 +7729,56 @@ check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner) return status; } +/** + * nfsd4_release_lockowner - process NFSv4.0 RELEASE_LOCKOWNER operations + * @rqstp: RPC transaction + * @cstate: NFSv4 COMPOUND state + * @u: RELEASE_LOCKOWNER arguments + * + * The lockowner's so_count is bumped when a lock record is added + * or when copying a conflicting lock. The latter case is brief, + * but can lead to fleeting false positives when looking for + * locks-in-use. + * + * Return values: + * %nfs_ok: lockowner released or not found + * %nfserr_locks_held: lockowner still in use + * %nfserr_stale_clientid: clientid no longer active + * %nfserr_expired: clientid not recognized + */ __be32 nfsd4_release_lockowner(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) { struct nfsd4_release_lockowner *rlockowner = &u->release_lockowner; + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); clientid_t *clid = &rlockowner->rl_clientid; - struct nfs4_stateowner *sop; - struct nfs4_lockowner *lo = NULL; struct nfs4_ol_stateid *stp; - struct xdr_netobj *owner = &rlockowner->rl_owner; - unsigned int hashval = ownerstr_hashval(owner); - __be32 status; - struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); + struct nfs4_lockowner *lo; struct nfs4_client *clp; - LIST_HEAD (reaplist); + LIST_HEAD(reaplist); + __be32 status; dprintk("nfsd4_release_lockowner clientid: (%08x/%08x):\n", clid->cl_boot, clid->cl_id); - status = lookup_clientid(clid, cstate, nn, false); + status = set_client(clid, cstate, nn); if (status) return status; - clp = cstate->clp; - /* Find the matching lock stateowner */ - spin_lock(&clp->cl_lock); - list_for_each_entry(sop, &clp->cl_ownerstr_hashtbl[hashval], - so_strhash) { - if (sop->so_is_open_owner || !same_owner_str(sop, owner)) - continue; - - /* see if there are still any locks associated with it */ - lo = lockowner(sop); - list_for_each_entry(stp, &sop->so_stateids, st_perstateowner) { - if (check_for_locks(stp->st_stid.sc_file, lo)) { - status = nfserr_locks_held; - spin_unlock(&clp->cl_lock); - return status; - } - } - - nfs4_get_stateowner(sop); - break; - } + spin_lock(&clp->cl_lock); + lo = find_lockowner_str_locked(clp, &rlockowner->rl_owner); if (!lo) { spin_unlock(&clp->cl_lock); - return status; + return nfs_ok; + } + if (atomic_read(&lo->lo_owner.so_count) != 2) { + spin_unlock(&clp->cl_lock); + nfs4_put_stateowner(&lo->lo_owner); + return nfserr_locks_held; } - unhash_lockowner_locked(lo); while (!list_empty(&lo->lo_owner.so_stateids)) { stp = list_first_entry(&lo->lo_owner.so_stateids, @@ -7028,11 +7788,11 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp, put_ol_stateid_locked(stp, &reaplist); } spin_unlock(&clp->cl_lock); + free_ol_stateid_reaplist(&reaplist); remove_blocked_locks(lo); nfs4_put_stateowner(&lo->lo_owner); - - return status; + return nfs_ok; } static inline struct nfs4_client_reclaim * @@ -7063,7 +7823,6 @@ nfs4_client_to_reclaim(struct xdr_netobj name, struct xdr_netobj princhash, unsigned int strhashval; struct nfs4_client_reclaim *crp; - dprintk("NFSD nfs4_client_to_reclaim NAME: %.*s\n", name.len, name.data); crp = alloc_reclaim(); if (crp) { strhashval = clientstr_hashval(name); @@ -7113,8 +7872,6 @@ nfsd4_find_reclaim_client(struct xdr_netobj name, struct nfsd_net *nn) unsigned int strhashval; struct nfs4_client_reclaim *crp = NULL; - dprintk("NFSD: nfs4_find_reclaim_client for name %.*s\n", name.len, name.data); - strhashval = clientstr_hashval(name); list_for_each_entry(crp, &nn->reclaim_str_hashtbl[strhashval], cr_strhash) { if (compare_blob(&crp->cr_name, &name) == 0) { @@ -7124,620 +7881,18 @@ nfsd4_find_reclaim_client(struct xdr_netobj name, struct nfsd_net *nn) return NULL; } -/* -* Called from OPEN. Look for clientid in reclaim list. -*/ __be32 -nfs4_check_open_reclaim(clientid_t *clid, - struct nfsd4_compound_state *cstate, - struct nfsd_net *nn) +nfs4_check_open_reclaim(struct nfs4_client *clp) { - __be32 status; - - /* find clientid in conf_id_hashtbl */ - status = lookup_clientid(clid, cstate, nn, false); - if (status) - return nfserr_reclaim_bad; - - if (test_bit(NFSD4_CLIENT_RECLAIM_COMPLETE, &cstate->clp->cl_flags)) + if (test_bit(NFSD4_CLIENT_RECLAIM_COMPLETE, &clp->cl_flags)) return nfserr_no_grace; - if (nfsd4_client_record_check(cstate->clp)) + if (nfsd4_client_record_check(clp)) return nfserr_reclaim_bad; return nfs_ok; } -#ifdef CONFIG_NFSD_FAULT_INJECTION -static inline void -put_client(struct nfs4_client *clp) -{ - atomic_dec(&clp->cl_rpc_users); -} - -static struct nfs4_client * -nfsd_find_client(struct sockaddr_storage *addr, size_t addr_size) -{ - struct nfs4_client *clp; - struct nfsd_net *nn = net_generic(current->nsproxy->net_ns, - nfsd_net_id); - - if (!nfsd_netns_ready(nn)) - return NULL; - - list_for_each_entry(clp, &nn->client_lru, cl_lru) { - if (memcmp(&clp->cl_addr, addr, addr_size) == 0) - return clp; - } - return NULL; -} - -u64 -nfsd_inject_print_clients(void) -{ - struct nfs4_client *clp; - u64 count = 0; - struct nfsd_net *nn = net_generic(current->nsproxy->net_ns, - nfsd_net_id); - char buf[INET6_ADDRSTRLEN]; - - if (!nfsd_netns_ready(nn)) - return 0; - - spin_lock(&nn->client_lock); - list_for_each_entry(clp, &nn->client_lru, cl_lru) { - rpc_ntop((struct sockaddr *)&clp->cl_addr, buf, sizeof(buf)); - pr_info("NFS Client: %s\n", buf); - ++count; - } - spin_unlock(&nn->client_lock); - - return count; -} - -u64 -nfsd_inject_forget_client(struct sockaddr_storage *addr, size_t addr_size) -{ - u64 count = 0; - struct nfs4_client *clp; - struct nfsd_net *nn = net_generic(current->nsproxy->net_ns, - nfsd_net_id); - - if (!nfsd_netns_ready(nn)) - return count; - - spin_lock(&nn->client_lock); - clp = nfsd_find_client(addr, addr_size); - if (clp) { - if (mark_client_expired_locked(clp) == nfs_ok) - ++count; - else - clp = NULL; - } - spin_unlock(&nn->client_lock); - - if (clp) - expire_client(clp); - - return count; -} - -u64 -nfsd_inject_forget_clients(u64 max) -{ - u64 count = 0; - struct nfs4_client *clp, *next; - struct nfsd_net *nn = net_generic(current->nsproxy->net_ns, - nfsd_net_id); - LIST_HEAD(reaplist); - - if (!nfsd_netns_ready(nn)) - return count; - - spin_lock(&nn->client_lock); - list_for_each_entry_safe(clp, next, &nn->client_lru, cl_lru) { - if (mark_client_expired_locked(clp) == nfs_ok) { - list_add(&clp->cl_lru, &reaplist); - if (max != 0 && ++count >= max) - break; - } - } - spin_unlock(&nn->client_lock); - - list_for_each_entry_safe(clp, next, &reaplist, cl_lru) - expire_client(clp); - - return count; -} - -static void nfsd_print_count(struct nfs4_client *clp, unsigned int count, - const char *type) -{ - char buf[INET6_ADDRSTRLEN]; - rpc_ntop((struct sockaddr *)&clp->cl_addr, buf, sizeof(buf)); - printk(KERN_INFO "NFS Client: %s has %u %s\n", buf, count, type); -} - -static void -nfsd_inject_add_lock_to_list(struct nfs4_ol_stateid *lst, - struct list_head *collect) -{ - struct nfs4_client *clp = lst->st_stid.sc_client; - struct nfsd_net *nn = net_generic(current->nsproxy->net_ns, - nfsd_net_id); - - if (!collect) - return; - - lockdep_assert_held(&nn->client_lock); - atomic_inc(&clp->cl_rpc_users); - list_add(&lst->st_locks, collect); -} - -static u64 nfsd_foreach_client_lock(struct nfs4_client *clp, u64 max, - struct list_head *collect, - bool (*func)(struct nfs4_ol_stateid *)) -{ - struct nfs4_openowner *oop; - struct nfs4_ol_stateid *stp, *st_next; - struct nfs4_ol_stateid *lst, *lst_next; - u64 count = 0; - - spin_lock(&clp->cl_lock); - list_for_each_entry(oop, &clp->cl_openowners, oo_perclient) { - list_for_each_entry_safe(stp, st_next, - &oop->oo_owner.so_stateids, st_perstateowner) { - list_for_each_entry_safe(lst, lst_next, - &stp->st_locks, st_locks) { - if (func) { - if (func(lst)) - nfsd_inject_add_lock_to_list(lst, - collect); - } - ++count; - /* - * Despite the fact that these functions deal - * with 64-bit integers for "count", we must - * ensure that it doesn't blow up the - * clp->cl_rpc_users. Throw a warning if we - * start to approach INT_MAX here. - */ - WARN_ON_ONCE(count == (INT_MAX / 2)); - if (count == max) - goto out; - } - } - } -out: - spin_unlock(&clp->cl_lock); - - return count; -} - -static u64 -nfsd_collect_client_locks(struct nfs4_client *clp, struct list_head *collect, - u64 max) -{ - return nfsd_foreach_client_lock(clp, max, collect, unhash_lock_stateid); -} - -static u64 -nfsd_print_client_locks(struct nfs4_client *clp) -{ - u64 count = nfsd_foreach_client_lock(clp, 0, NULL, NULL); - nfsd_print_count(clp, count, "locked files"); - return count; -} - -u64 -nfsd_inject_print_locks(void) -{ - struct nfs4_client *clp; - u64 count = 0; - struct nfsd_net *nn = net_generic(current->nsproxy->net_ns, - nfsd_net_id); - - if (!nfsd_netns_ready(nn)) - return 0; - - spin_lock(&nn->client_lock); - list_for_each_entry(clp, &nn->client_lru, cl_lru) - count += nfsd_print_client_locks(clp); - spin_unlock(&nn->client_lock); - - return count; -} - -static void -nfsd_reap_locks(struct list_head *reaplist) -{ - struct nfs4_client *clp; - struct nfs4_ol_stateid *stp, *next; - - list_for_each_entry_safe(stp, next, reaplist, st_locks) { - list_del_init(&stp->st_locks); - clp = stp->st_stid.sc_client; - nfs4_put_stid(&stp->st_stid); - put_client(clp); - } -} - -u64 -nfsd_inject_forget_client_locks(struct sockaddr_storage *addr, size_t addr_size) -{ - unsigned int count = 0; - struct nfs4_client *clp; - struct nfsd_net *nn = net_generic(current->nsproxy->net_ns, - nfsd_net_id); - LIST_HEAD(reaplist); - - if (!nfsd_netns_ready(nn)) - return count; - - spin_lock(&nn->client_lock); - clp = nfsd_find_client(addr, addr_size); - if (clp) - count = nfsd_collect_client_locks(clp, &reaplist, 0); - spin_unlock(&nn->client_lock); - nfsd_reap_locks(&reaplist); - return count; -} - -u64 -nfsd_inject_forget_locks(u64 max) -{ - u64 count = 0; - struct nfs4_client *clp; - struct nfsd_net *nn = net_generic(current->nsproxy->net_ns, - nfsd_net_id); - LIST_HEAD(reaplist); - - if (!nfsd_netns_ready(nn)) - return count; - - spin_lock(&nn->client_lock); - list_for_each_entry(clp, &nn->client_lru, cl_lru) { - count += nfsd_collect_client_locks(clp, &reaplist, max - count); - if (max != 0 && count >= max) - break; - } - spin_unlock(&nn->client_lock); - nfsd_reap_locks(&reaplist); - return count; -} - -static u64 -nfsd_foreach_client_openowner(struct nfs4_client *clp, u64 max, - struct list_head *collect, - void (*func)(struct nfs4_openowner *)) -{ - struct nfs4_openowner *oop, *next; - struct nfsd_net *nn = net_generic(current->nsproxy->net_ns, - nfsd_net_id); - u64 count = 0; - - lockdep_assert_held(&nn->client_lock); - - spin_lock(&clp->cl_lock); - list_for_each_entry_safe(oop, next, &clp->cl_openowners, oo_perclient) { - if (func) { - func(oop); - if (collect) { - atomic_inc(&clp->cl_rpc_users); - list_add(&oop->oo_perclient, collect); - } - } - ++count; - /* - * Despite the fact that these functions deal with - * 64-bit integers for "count", we must ensure that - * it doesn't blow up the clp->cl_rpc_users. Throw a - * warning if we start to approach INT_MAX here. - */ - WARN_ON_ONCE(count == (INT_MAX / 2)); - if (count == max) - break; - } - spin_unlock(&clp->cl_lock); - - return count; -} - -static u64 -nfsd_print_client_openowners(struct nfs4_client *clp) -{ - u64 count = nfsd_foreach_client_openowner(clp, 0, NULL, NULL); - - nfsd_print_count(clp, count, "openowners"); - return count; -} - -static u64 -nfsd_collect_client_openowners(struct nfs4_client *clp, - struct list_head *collect, u64 max) -{ - return nfsd_foreach_client_openowner(clp, max, collect, - unhash_openowner_locked); -} - -u64 -nfsd_inject_print_openowners(void) -{ - struct nfs4_client *clp; - u64 count = 0; - struct nfsd_net *nn = net_generic(current->nsproxy->net_ns, - nfsd_net_id); - - if (!nfsd_netns_ready(nn)) - return 0; - - spin_lock(&nn->client_lock); - list_for_each_entry(clp, &nn->client_lru, cl_lru) - count += nfsd_print_client_openowners(clp); - spin_unlock(&nn->client_lock); - - return count; -} - -static void -nfsd_reap_openowners(struct list_head *reaplist) -{ - struct nfs4_client *clp; - struct nfs4_openowner *oop, *next; - - list_for_each_entry_safe(oop, next, reaplist, oo_perclient) { - list_del_init(&oop->oo_perclient); - clp = oop->oo_owner.so_client; - release_openowner(oop); - put_client(clp); - } -} - -u64 -nfsd_inject_forget_client_openowners(struct sockaddr_storage *addr, - size_t addr_size) -{ - unsigned int count = 0; - struct nfs4_client *clp; - struct nfsd_net *nn = net_generic(current->nsproxy->net_ns, - nfsd_net_id); - LIST_HEAD(reaplist); - - if (!nfsd_netns_ready(nn)) - return count; - - spin_lock(&nn->client_lock); - clp = nfsd_find_client(addr, addr_size); - if (clp) - count = nfsd_collect_client_openowners(clp, &reaplist, 0); - spin_unlock(&nn->client_lock); - nfsd_reap_openowners(&reaplist); - return count; -} - -u64 -nfsd_inject_forget_openowners(u64 max) -{ - u64 count = 0; - struct nfs4_client *clp; - struct nfsd_net *nn = net_generic(current->nsproxy->net_ns, - nfsd_net_id); - LIST_HEAD(reaplist); - - if (!nfsd_netns_ready(nn)) - return count; - - spin_lock(&nn->client_lock); - list_for_each_entry(clp, &nn->client_lru, cl_lru) { - count += nfsd_collect_client_openowners(clp, &reaplist, - max - count); - if (max != 0 && count >= max) - break; - } - spin_unlock(&nn->client_lock); - nfsd_reap_openowners(&reaplist); - return count; -} - -static u64 nfsd_find_all_delegations(struct nfs4_client *clp, u64 max, - struct list_head *victims) -{ - struct nfs4_delegation *dp, *next; - struct nfsd_net *nn = net_generic(current->nsproxy->net_ns, - nfsd_net_id); - u64 count = 0; - - lockdep_assert_held(&nn->client_lock); - - spin_lock(&state_lock); - list_for_each_entry_safe(dp, next, &clp->cl_delegations, dl_perclnt) { - if (victims) { - /* - * It's not safe to mess with delegations that have a - * non-zero dl_time. They might have already been broken - * and could be processed by the laundromat outside of - * the state_lock. Just leave them be. - */ - if (dp->dl_time != 0) - continue; - - atomic_inc(&clp->cl_rpc_users); - WARN_ON(!unhash_delegation_locked(dp)); - list_add(&dp->dl_recall_lru, victims); - } - ++count; - /* - * Despite the fact that these functions deal with - * 64-bit integers for "count", we must ensure that - * it doesn't blow up the clp->cl_rpc_users. Throw a - * warning if we start to approach INT_MAX here. - */ - WARN_ON_ONCE(count == (INT_MAX / 2)); - if (count == max) - break; - } - spin_unlock(&state_lock); - return count; -} - -static u64 -nfsd_print_client_delegations(struct nfs4_client *clp) -{ - u64 count = nfsd_find_all_delegations(clp, 0, NULL); - - nfsd_print_count(clp, count, "delegations"); - return count; -} - -u64 -nfsd_inject_print_delegations(void) -{ - struct nfs4_client *clp; - u64 count = 0; - struct nfsd_net *nn = net_generic(current->nsproxy->net_ns, - nfsd_net_id); - - if (!nfsd_netns_ready(nn)) - return 0; - - spin_lock(&nn->client_lock); - list_for_each_entry(clp, &nn->client_lru, cl_lru) - count += nfsd_print_client_delegations(clp); - spin_unlock(&nn->client_lock); - - return count; -} - -static void -nfsd_forget_delegations(struct list_head *reaplist) -{ - struct nfs4_client *clp; - struct nfs4_delegation *dp, *next; - - list_for_each_entry_safe(dp, next, reaplist, dl_recall_lru) { - list_del_init(&dp->dl_recall_lru); - clp = dp->dl_stid.sc_client; - revoke_delegation(dp); - put_client(clp); - } -} - -u64 -nfsd_inject_forget_client_delegations(struct sockaddr_storage *addr, - size_t addr_size) -{ - u64 count = 0; - struct nfs4_client *clp; - struct nfsd_net *nn = net_generic(current->nsproxy->net_ns, - nfsd_net_id); - LIST_HEAD(reaplist); - - if (!nfsd_netns_ready(nn)) - return count; - - spin_lock(&nn->client_lock); - clp = nfsd_find_client(addr, addr_size); - if (clp) - count = nfsd_find_all_delegations(clp, 0, &reaplist); - spin_unlock(&nn->client_lock); - - nfsd_forget_delegations(&reaplist); - return count; -} - -u64 -nfsd_inject_forget_delegations(u64 max) -{ - u64 count = 0; - struct nfs4_client *clp; - struct nfsd_net *nn = net_generic(current->nsproxy->net_ns, - nfsd_net_id); - LIST_HEAD(reaplist); - - if (!nfsd_netns_ready(nn)) - return count; - - spin_lock(&nn->client_lock); - list_for_each_entry(clp, &nn->client_lru, cl_lru) { - count += nfsd_find_all_delegations(clp, max - count, &reaplist); - if (max != 0 && count >= max) - break; - } - spin_unlock(&nn->client_lock); - nfsd_forget_delegations(&reaplist); - return count; -} - -static void -nfsd_recall_delegations(struct list_head *reaplist) -{ - struct nfs4_client *clp; - struct nfs4_delegation *dp, *next; - - list_for_each_entry_safe(dp, next, reaplist, dl_recall_lru) { - list_del_init(&dp->dl_recall_lru); - clp = dp->dl_stid.sc_client; - /* - * We skipped all entries that had a zero dl_time before, - * so we can now reset the dl_time back to 0. If a delegation - * break comes in now, then it won't make any difference since - * we're recalling it either way. - */ - spin_lock(&state_lock); - dp->dl_time = 0; - spin_unlock(&state_lock); - nfsd_break_one_deleg(dp); - put_client(clp); - } -} - -u64 -nfsd_inject_recall_client_delegations(struct sockaddr_storage *addr, - size_t addr_size) -{ - u64 count = 0; - struct nfs4_client *clp; - struct nfsd_net *nn = net_generic(current->nsproxy->net_ns, - nfsd_net_id); - LIST_HEAD(reaplist); - - if (!nfsd_netns_ready(nn)) - return count; - - spin_lock(&nn->client_lock); - clp = nfsd_find_client(addr, addr_size); - if (clp) - count = nfsd_find_all_delegations(clp, 0, &reaplist); - spin_unlock(&nn->client_lock); - - nfsd_recall_delegations(&reaplist); - return count; -} - -u64 -nfsd_inject_recall_delegations(u64 max) -{ - u64 count = 0; - struct nfs4_client *clp, *next; - struct nfsd_net *nn = net_generic(current->nsproxy->net_ns, - nfsd_net_id); - LIST_HEAD(reaplist); - - if (!nfsd_netns_ready(nn)) - return count; - - spin_lock(&nn->client_lock); - list_for_each_entry_safe(clp, next, &nn->client_lru, cl_lru) { - count += nfsd_find_all_delegations(clp, max - count, &reaplist); - if (max != 0 && ++count >= max) - break; - } - spin_unlock(&nn->client_lock); - nfsd_recall_delegations(&reaplist); - return count; -} -#endif /* CONFIG_NFSD_FAULT_INJECTION */ - /* * Since the lifetime of a delegation isn't limited to that of an open, a * client may quite reasonably hang on to a delegation as long as it has @@ -7803,6 +7958,7 @@ static int nfs4_state_create_net(struct net *net) INIT_LIST_HEAD(&nn->blocked_locks_lru); INIT_DELAYED_WORK(&nn->laundromat_work, laundromat_main); + INIT_DELAYED_WORK(&nn->nfsd_shrinker_work, courtesy_client_reaper); get_net(net); return 0; @@ -7859,6 +8015,7 @@ nfs4_state_start_net(struct net *net) goto skip_grace; printk(KERN_INFO "NFSD: starting %lld-second grace period (net %x)\n", nn->nfsd4_grace, net->ns.inum); + trace_nfsd_grace_start(nn); queue_delayed_work(laundry_wq, &nn->laundromat_work, nn->nfsd4_grace * HZ); return 0; @@ -7877,22 +8034,12 @@ nfs4_state_start(void) { int ret; - laundry_wq = alloc_workqueue("%s", WQ_UNBOUND, 0, "nfsd4"); - if (laundry_wq == NULL) { - ret = -ENOMEM; - goto out; - } ret = nfsd4_create_callback_queue(); if (ret) - goto out_free_laundry; + return ret; set_max_delegations(); return 0; - -out_free_laundry: - destroy_workqueue(laundry_wq); -out: - return ret; } void @@ -7921,12 +8068,14 @@ nfs4_state_shutdown_net(struct net *net) nfsd4_client_tracking_exit(net); nfs4_state_destroy_net(net); +#ifdef CONFIG_NFSD_V4_2_INTER_SSC + nfsd4_ssc_shutdown_umount(nn); +#endif } void nfs4_state_shutdown(void) { - destroy_workqueue(laundry_wq); nfsd4_destroy_callback_queue(); } diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 9761512674a0..bcfeb1a922c0 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -41,6 +41,10 @@ #include <linux/pagemap.h> #include <linux/sunrpc/svcauth_gss.h> #include <linux/sunrpc/addr.h> +#include <linux/xattr.h> +#include <linux/vmalloc.h> + +#include <uapi/linux/xattr.h> #include "idmap.h" #include "acl.h" @@ -52,6 +56,8 @@ #include "pnfs.h" #include "filecache.h" +#include "trace.h" + #ifdef CONFIG_NFSD_V4_SECURITY_LABEL #include <linux/security.h> #endif @@ -88,6 +94,8 @@ check_filename(char *str, int len) if (len == 0) return nfserr_inval; + if (len > NFS4_MAXNAMLEN) + return nfserr_nametoolong; if (isdotent(str, len)) return nfserr_badname; for (i = 0; i < len; i++) @@ -96,122 +104,6 @@ check_filename(char *str, int len) return 0; } -#define DECODE_HEAD \ - __be32 *p; \ - __be32 status -#define DECODE_TAIL \ - status = 0; \ -out: \ - return status; \ -xdr_error: \ - dprintk("NFSD: xdr error (%s:%d)\n", \ - __FILE__, __LINE__); \ - status = nfserr_bad_xdr; \ - goto out - -#define READMEM(x,nbytes) do { \ - x = (char *)p; \ - p += XDR_QUADLEN(nbytes); \ -} while (0) -#define SAVEMEM(x,nbytes) do { \ - if (!(x = (p==argp->tmp || p == argp->tmpp) ? \ - savemem(argp, p, nbytes) : \ - (char *)p)) { \ - dprintk("NFSD: xdr error (%s:%d)\n", \ - __FILE__, __LINE__); \ - goto xdr_error; \ - } \ - p += XDR_QUADLEN(nbytes); \ -} while (0) -#define COPYMEM(x,nbytes) do { \ - memcpy((x), p, nbytes); \ - p += XDR_QUADLEN(nbytes); \ -} while (0) - -/* READ_BUF, read_buf(): nbytes must be <= PAGE_SIZE */ -#define READ_BUF(nbytes) do { \ - if (nbytes <= (u32)((char *)argp->end - (char *)argp->p)) { \ - p = argp->p; \ - argp->p += XDR_QUADLEN(nbytes); \ - } else if (!(p = read_buf(argp, nbytes))) { \ - dprintk("NFSD: xdr error (%s:%d)\n", \ - __FILE__, __LINE__); \ - goto xdr_error; \ - } \ -} while (0) - -static void next_decode_page(struct nfsd4_compoundargs *argp) -{ - argp->p = page_address(argp->pagelist[0]); - argp->pagelist++; - if (argp->pagelen < PAGE_SIZE) { - argp->end = argp->p + XDR_QUADLEN(argp->pagelen); - argp->pagelen = 0; - } else { - argp->end = argp->p + (PAGE_SIZE>>2); - argp->pagelen -= PAGE_SIZE; - } -} - -static __be32 *read_buf(struct nfsd4_compoundargs *argp, u32 nbytes) -{ - /* We want more bytes than seem to be available. - * Maybe we need a new page, maybe we have just run out - */ - unsigned int avail = (char *)argp->end - (char *)argp->p; - __be32 *p; - - if (argp->pagelen == 0) { - struct kvec *vec = &argp->rqstp->rq_arg.tail[0]; - - if (!argp->tail) { - argp->tail = true; - avail = vec->iov_len; - argp->p = vec->iov_base; - argp->end = vec->iov_base + avail; - } - - if (avail < nbytes) - return NULL; - - p = argp->p; - argp->p += XDR_QUADLEN(nbytes); - return p; - } - - if (avail + argp->pagelen < nbytes) - return NULL; - if (avail + PAGE_SIZE < nbytes) /* need more than a page !! */ - return NULL; - /* ok, we can do it with the current plus the next page */ - if (nbytes <= sizeof(argp->tmp)) - p = argp->tmp; - else { - kfree(argp->tmpp); - p = argp->tmpp = kmalloc(nbytes, GFP_KERNEL); - if (!p) - return NULL; - - } - /* - * The following memcpy is safe because read_buf is always - * called with nbytes > avail, and the two cases above both - * guarantee p points to at least nbytes bytes. - */ - memcpy(p, argp->p, avail); - next_decode_page(argp); - memcpy(((char*)p)+avail, argp->p, (nbytes - avail)); - argp->p += XDR_QUADLEN(nbytes - avail); - return p; -} - -static unsigned int compoundargs_bytes_left(struct nfsd4_compoundargs *argp) -{ - unsigned int this = (char *)argp->end - (char *)argp->p; - - return this + argp->pagelen; -} - static int zero_clientid(clientid_t *clid) { return (clid->cl_boot == 0) && (clid->cl_id == 0); @@ -257,80 +149,246 @@ svcxdr_dupstr(struct nfsd4_compoundargs *argp, void *buf, u32 len) return p; } -/** - * savemem - duplicate a chunk of memory for later processing - * @argp: NFSv4 compound argument structure to be freed with - * @p: pointer to be duplicated - * @nbytes: length to be duplicated - * - * Returns a pointer to a copy of @nbytes bytes of memory at @p - * that are preserved until processing of the NFSv4 compound - * operation described by @argp finishes. - */ -static char *savemem(struct nfsd4_compoundargs *argp, __be32 *p, int nbytes) +static void * +svcxdr_savemem(struct nfsd4_compoundargs *argp, __be32 *p, u32 len) { - void *ret; + __be32 *tmp; - ret = svcxdr_tmpalloc(argp, nbytes); - if (!ret) + /* + * The location of the decoded data item is stable, + * so @p is OK to use. This is the common case. + */ + if (p != argp->xdr->scratch.iov_base) + return p; + + tmp = svcxdr_tmpalloc(argp, len); + if (!tmp) return NULL; - memcpy(ret, p, nbytes); - return ret; + memcpy(tmp, p, len); + return tmp; +} + +/* + * NFSv4 basic data type decoders + */ + +/* + * This helper handles variable-length opaques which belong to protocol + * elements that this implementation does not support. + */ +static __be32 +nfsd4_decode_ignored_string(struct nfsd4_compoundargs *argp, u32 maxlen) +{ + u32 len; + + if (xdr_stream_decode_u32(argp->xdr, &len) < 0) + return nfserr_bad_xdr; + if (maxlen && len > maxlen) + return nfserr_bad_xdr; + if (!xdr_inline_decode(argp->xdr, len)) + return nfserr_bad_xdr; + + return nfs_ok; +} + +static __be32 +nfsd4_decode_opaque(struct nfsd4_compoundargs *argp, struct xdr_netobj *o) +{ + __be32 *p; + u32 len; + + if (xdr_stream_decode_u32(argp->xdr, &len) < 0) + return nfserr_bad_xdr; + if (len == 0 || len > NFS4_OPAQUE_LIMIT) + return nfserr_bad_xdr; + p = xdr_inline_decode(argp->xdr, len); + if (!p) + return nfserr_bad_xdr; + o->data = svcxdr_savemem(argp, p, len); + if (!o->data) + return nfserr_jukebox; + o->len = len; + + return nfs_ok; +} + +static __be32 +nfsd4_decode_component4(struct nfsd4_compoundargs *argp, char **namp, u32 *lenp) +{ + __be32 *p, status; + + if (xdr_stream_decode_u32(argp->xdr, lenp) < 0) + return nfserr_bad_xdr; + p = xdr_inline_decode(argp->xdr, *lenp); + if (!p) + return nfserr_bad_xdr; + status = check_filename((char *)p, *lenp); + if (status) + return status; + *namp = svcxdr_savemem(argp, p, *lenp); + if (!*namp) + return nfserr_jukebox; + + return nfs_ok; } static __be32 -nfsd4_decode_time(struct nfsd4_compoundargs *argp, struct timespec64 *tv) +nfsd4_decode_nfstime4(struct nfsd4_compoundargs *argp, struct timespec64 *tv) { - DECODE_HEAD; + __be32 *p; - READ_BUF(12); + p = xdr_inline_decode(argp->xdr, XDR_UNIT * 3); + if (!p) + return nfserr_bad_xdr; p = xdr_decode_hyper(p, &tv->tv_sec); tv->tv_nsec = be32_to_cpup(p++); if (tv->tv_nsec >= (u32)1000000000) return nfserr_inval; + return nfs_ok; +} + +static __be32 +nfsd4_decode_verifier4(struct nfsd4_compoundargs *argp, nfs4_verifier *verf) +{ + __be32 *p; + + p = xdr_inline_decode(argp->xdr, NFS4_VERIFIER_SIZE); + if (!p) + return nfserr_bad_xdr; + memcpy(verf->data, p, sizeof(verf->data)); + return nfs_ok; +} + +/** + * nfsd4_decode_bitmap4 - Decode an NFSv4 bitmap4 + * @argp: NFSv4 compound argument structure + * @bmval: pointer to an array of u32's to decode into + * @bmlen: size of the @bmval array + * + * The server needs to return nfs_ok rather than nfserr_bad_xdr when + * encountering bitmaps containing bits it does not recognize. This + * includes bits in bitmap words past WORDn, where WORDn is the last + * bitmap WORD the implementation currently supports. Thus we are + * careful here to simply ignore bits in bitmap words that this + * implementation has yet to support explicitly. + * + * Return values: + * %nfs_ok: @bmval populated successfully + * %nfserr_bad_xdr: the encoded bitmap was invalid + */ +static __be32 +nfsd4_decode_bitmap4(struct nfsd4_compoundargs *argp, u32 *bmval, u32 bmlen) +{ + ssize_t status; - DECODE_TAIL; + status = xdr_stream_decode_uint32_array(argp->xdr, bmval, bmlen); + return status == -EBADMSG ? nfserr_bad_xdr : nfs_ok; } static __be32 -nfsd4_decode_bitmap(struct nfsd4_compoundargs *argp, u32 *bmval) +nfsd4_decode_nfsace4(struct nfsd4_compoundargs *argp, struct nfs4_ace *ace) +{ + __be32 *p, status; + u32 length; + + if (xdr_stream_decode_u32(argp->xdr, &ace->type) < 0) + return nfserr_bad_xdr; + if (xdr_stream_decode_u32(argp->xdr, &ace->flag) < 0) + return nfserr_bad_xdr; + if (xdr_stream_decode_u32(argp->xdr, &ace->access_mask) < 0) + return nfserr_bad_xdr; + + if (xdr_stream_decode_u32(argp->xdr, &length) < 0) + return nfserr_bad_xdr; + p = xdr_inline_decode(argp->xdr, length); + if (!p) + return nfserr_bad_xdr; + ace->whotype = nfs4_acl_get_whotype((char *)p, length); + if (ace->whotype != NFS4_ACL_WHO_NAMED) + status = nfs_ok; + else if (ace->flag & NFS4_ACE_IDENTIFIER_GROUP) + status = nfsd_map_name_to_gid(argp->rqstp, + (char *)p, length, &ace->who_gid); + else + status = nfsd_map_name_to_uid(argp->rqstp, + (char *)p, length, &ace->who_uid); + + return status; +} + +/* A counted array of nfsace4's */ +static noinline __be32 +nfsd4_decode_acl(struct nfsd4_compoundargs *argp, struct nfs4_acl **acl) { - u32 bmlen; - DECODE_HEAD; + struct nfs4_ace *ace; + __be32 status; + u32 count; - bmval[0] = 0; - bmval[1] = 0; - bmval[2] = 0; + if (xdr_stream_decode_u32(argp->xdr, &count) < 0) + return nfserr_bad_xdr; + + if (count > xdr_stream_remaining(argp->xdr) / 20) + /* + * Even with 4-byte names there wouldn't be + * space for that many aces; something fishy is + * going on: + */ + return nfserr_fbig; + + *acl = svcxdr_tmpalloc(argp, nfs4_acl_bytes(count)); + if (*acl == NULL) + return nfserr_jukebox; + + (*acl)->naces = count; + for (ace = (*acl)->aces; ace < (*acl)->aces + count; ace++) { + status = nfsd4_decode_nfsace4(argp, ace); + if (status) + return status; + } + + return nfs_ok; +} + +static noinline __be32 +nfsd4_decode_security_label(struct nfsd4_compoundargs *argp, + struct xdr_netobj *label) +{ + u32 lfs, pi, length; + __be32 *p; - READ_BUF(4); - bmlen = be32_to_cpup(p++); - if (bmlen > 1000) - goto xdr_error; + if (xdr_stream_decode_u32(argp->xdr, &lfs) < 0) + return nfserr_bad_xdr; + if (xdr_stream_decode_u32(argp->xdr, &pi) < 0) + return nfserr_bad_xdr; - READ_BUF(bmlen << 2); - if (bmlen > 0) - bmval[0] = be32_to_cpup(p++); - if (bmlen > 1) - bmval[1] = be32_to_cpup(p++); - if (bmlen > 2) - bmval[2] = be32_to_cpup(p++); + if (xdr_stream_decode_u32(argp->xdr, &length) < 0) + return nfserr_bad_xdr; + if (length > NFS4_MAXLABELLEN) + return nfserr_badlabel; + p = xdr_inline_decode(argp->xdr, length); + if (!p) + return nfserr_bad_xdr; + label->len = length; + label->data = svcxdr_dupstr(argp, p, length); + if (!label->data) + return nfserr_jukebox; - DECODE_TAIL; + return nfs_ok; } static __be32 -nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, - struct iattr *iattr, struct nfs4_acl **acl, - struct xdr_netobj *label, int *umask) +nfsd4_decode_fattr4(struct nfsd4_compoundargs *argp, u32 *bmval, u32 bmlen, + struct iattr *iattr, struct nfs4_acl **acl, + struct xdr_netobj *label, int *umask) { - int expected_len, len = 0; - u32 dummy32; - char *buf; + unsigned int starting_pos; + u32 attrlist4_count; + __be32 *p, status; - DECODE_HEAD; iattr->ia_valid = 0; - if ((status = nfsd4_decode_bitmap(argp, bmval))) - return status; + status = nfsd4_decode_bitmap4(argp, bmval, bmlen); + if (status) + return nfserr_bad_xdr; if (bmval[0] & ~NFSD_WRITEABLE_ATTRS_WORD0 || bmval[1] & ~NFSD_WRITEABLE_ATTRS_WORD1 @@ -340,96 +398,69 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, return nfserr_attrnotsupp; } - READ_BUF(4); - expected_len = be32_to_cpup(p++); + if (xdr_stream_decode_u32(argp->xdr, &attrlist4_count) < 0) + return nfserr_bad_xdr; + starting_pos = xdr_stream_pos(argp->xdr); if (bmval[0] & FATTR4_WORD0_SIZE) { - READ_BUF(8); - len += 8; - p = xdr_decode_hyper(p, &iattr->ia_size); + u64 size; + + if (xdr_stream_decode_u64(argp->xdr, &size) < 0) + return nfserr_bad_xdr; + iattr->ia_size = size; iattr->ia_valid |= ATTR_SIZE; } if (bmval[0] & FATTR4_WORD0_ACL) { - u32 nace; - struct nfs4_ace *ace; - - READ_BUF(4); len += 4; - nace = be32_to_cpup(p++); - - if (nace > compoundargs_bytes_left(argp)/20) - /* - * Even with 4-byte names there wouldn't be - * space for that many aces; something fishy is - * going on: - */ - return nfserr_fbig; - - *acl = svcxdr_tmpalloc(argp, nfs4_acl_bytes(nace)); - if (*acl == NULL) - return nfserr_jukebox; - - (*acl)->naces = nace; - for (ace = (*acl)->aces; ace < (*acl)->aces + nace; ace++) { - READ_BUF(16); len += 16; - ace->type = be32_to_cpup(p++); - ace->flag = be32_to_cpup(p++); - ace->access_mask = be32_to_cpup(p++); - dummy32 = be32_to_cpup(p++); - READ_BUF(dummy32); - len += XDR_QUADLEN(dummy32) << 2; - READMEM(buf, dummy32); - ace->whotype = nfs4_acl_get_whotype(buf, dummy32); - status = nfs_ok; - if (ace->whotype != NFS4_ACL_WHO_NAMED) - ; - else if (ace->flag & NFS4_ACE_IDENTIFIER_GROUP) - status = nfsd_map_name_to_gid(argp->rqstp, - buf, dummy32, &ace->who_gid); - else - status = nfsd_map_name_to_uid(argp->rqstp, - buf, dummy32, &ace->who_uid); - if (status) - return status; - } + status = nfsd4_decode_acl(argp, acl); + if (status) + return status; } else *acl = NULL; if (bmval[1] & FATTR4_WORD1_MODE) { - READ_BUF(4); - len += 4; - iattr->ia_mode = be32_to_cpup(p++); + u32 mode; + + if (xdr_stream_decode_u32(argp->xdr, &mode) < 0) + return nfserr_bad_xdr; + iattr->ia_mode = mode; iattr->ia_mode &= (S_IFMT | S_IALLUGO); iattr->ia_valid |= ATTR_MODE; } if (bmval[1] & FATTR4_WORD1_OWNER) { - READ_BUF(4); - len += 4; - dummy32 = be32_to_cpup(p++); - READ_BUF(dummy32); - len += (XDR_QUADLEN(dummy32) << 2); - READMEM(buf, dummy32); - if ((status = nfsd_map_name_to_uid(argp->rqstp, buf, dummy32, &iattr->ia_uid))) + u32 length; + + if (xdr_stream_decode_u32(argp->xdr, &length) < 0) + return nfserr_bad_xdr; + p = xdr_inline_decode(argp->xdr, length); + if (!p) + return nfserr_bad_xdr; + status = nfsd_map_name_to_uid(argp->rqstp, (char *)p, length, + &iattr->ia_uid); + if (status) return status; iattr->ia_valid |= ATTR_UID; } if (bmval[1] & FATTR4_WORD1_OWNER_GROUP) { - READ_BUF(4); - len += 4; - dummy32 = be32_to_cpup(p++); - READ_BUF(dummy32); - len += (XDR_QUADLEN(dummy32) << 2); - READMEM(buf, dummy32); - if ((status = nfsd_map_name_to_gid(argp->rqstp, buf, dummy32, &iattr->ia_gid))) + u32 length; + + if (xdr_stream_decode_u32(argp->xdr, &length) < 0) + return nfserr_bad_xdr; + p = xdr_inline_decode(argp->xdr, length); + if (!p) + return nfserr_bad_xdr; + status = nfsd_map_name_to_gid(argp->rqstp, (char *)p, length, + &iattr->ia_gid); + if (status) return status; iattr->ia_valid |= ATTR_GID; } if (bmval[1] & FATTR4_WORD1_TIME_ACCESS_SET) { - READ_BUF(4); - len += 4; - dummy32 = be32_to_cpup(p++); - switch (dummy32) { + u32 set_it; + + if (xdr_stream_decode_u32(argp->xdr, &set_it) < 0) + return nfserr_bad_xdr; + switch (set_it) { case NFS4_SET_TO_CLIENT_TIME: - len += 12; - status = nfsd4_decode_time(argp, &iattr->ia_atime); + status = nfsd4_decode_nfstime4(argp, &iattr->ia_atime); if (status) return status; iattr->ia_valid |= (ATTR_ATIME | ATTR_ATIME_SET); @@ -438,17 +469,26 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, iattr->ia_valid |= ATTR_ATIME; break; default: - goto xdr_error; + return nfserr_bad_xdr; } } + if (bmval[1] & FATTR4_WORD1_TIME_CREATE) { + struct timespec64 ts; + + /* No Linux filesystem supports setting this attribute. */ + bmval[1] &= ~FATTR4_WORD1_TIME_CREATE; + status = nfsd4_decode_nfstime4(argp, &ts); + if (status) + return status; + } if (bmval[1] & FATTR4_WORD1_TIME_MODIFY_SET) { - READ_BUF(4); - len += 4; - dummy32 = be32_to_cpup(p++); - switch (dummy32) { + u32 set_it; + + if (xdr_stream_decode_u32(argp->xdr, &set_it) < 0) + return nfserr_bad_xdr; + switch (set_it) { case NFS4_SET_TO_CLIENT_TIME: - len += 12; - status = nfsd4_decode_time(argp, &iattr->ia_mtime); + status = nfsd4_decode_nfstime4(argp, &iattr->ia_mtime); if (status) return status; iattr->ia_valid |= (ATTR_MTIME | ATTR_MTIME_SET); @@ -457,222 +497,331 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, iattr->ia_valid |= ATTR_MTIME; break; default: - goto xdr_error; + return nfserr_bad_xdr; } } - label->len = 0; if (IS_ENABLED(CONFIG_NFSD_V4_SECURITY_LABEL) && bmval[2] & FATTR4_WORD2_SECURITY_LABEL) { - READ_BUF(4); - len += 4; - dummy32 = be32_to_cpup(p++); /* lfs: we don't use it */ - READ_BUF(4); - len += 4; - dummy32 = be32_to_cpup(p++); /* pi: we don't use it either */ - READ_BUF(4); - len += 4; - dummy32 = be32_to_cpup(p++); - READ_BUF(dummy32); - if (dummy32 > NFS4_MAXLABELLEN) - return nfserr_badlabel; - len += (XDR_QUADLEN(dummy32) << 2); - READMEM(buf, dummy32); - label->len = dummy32; - label->data = svcxdr_dupstr(argp, buf, dummy32); - if (!label->data) - return nfserr_jukebox; + status = nfsd4_decode_security_label(argp, label); + if (status) + return status; } if (bmval[2] & FATTR4_WORD2_MODE_UMASK) { + u32 mode, mask; + if (!umask) - goto xdr_error; - READ_BUF(8); - len += 8; - dummy32 = be32_to_cpup(p++); - iattr->ia_mode = dummy32 & (S_IFMT | S_IALLUGO); - dummy32 = be32_to_cpup(p++); - *umask = dummy32 & S_IRWXUGO; + return nfserr_bad_xdr; + if (xdr_stream_decode_u32(argp->xdr, &mode) < 0) + return nfserr_bad_xdr; + iattr->ia_mode = mode & (S_IFMT | S_IALLUGO); + if (xdr_stream_decode_u32(argp->xdr, &mask) < 0) + return nfserr_bad_xdr; + *umask = mask & S_IRWXUGO; iattr->ia_valid |= ATTR_MODE; } - if (len != expected_len) - goto xdr_error; - DECODE_TAIL; + /* request sanity: did attrlist4 contain the expected number of words? */ + if (attrlist4_count != xdr_stream_pos(argp->xdr) - starting_pos) + return nfserr_bad_xdr; + + return nfs_ok; } static __be32 -nfsd4_decode_stateid(struct nfsd4_compoundargs *argp, stateid_t *sid) +nfsd4_decode_stateid4(struct nfsd4_compoundargs *argp, stateid_t *sid) { - DECODE_HEAD; + __be32 *p; - READ_BUF(sizeof(stateid_t)); + p = xdr_inline_decode(argp->xdr, NFS4_STATEID_SIZE); + if (!p) + return nfserr_bad_xdr; sid->si_generation = be32_to_cpup(p++); - COPYMEM(&sid->si_opaque, sizeof(stateid_opaque_t)); + memcpy(&sid->si_opaque, p, sizeof(sid->si_opaque)); + return nfs_ok; +} + +static __be32 +nfsd4_decode_clientid4(struct nfsd4_compoundargs *argp, clientid_t *clientid) +{ + __be32 *p; + + p = xdr_inline_decode(argp->xdr, sizeof(__be64)); + if (!p) + return nfserr_bad_xdr; + memcpy(clientid, p, sizeof(*clientid)); + return nfs_ok; +} + +static __be32 +nfsd4_decode_state_owner4(struct nfsd4_compoundargs *argp, + clientid_t *clientid, struct xdr_netobj *owner) +{ + __be32 status; + + status = nfsd4_decode_clientid4(argp, clientid); + if (status) + return status; + return nfsd4_decode_opaque(argp, owner); +} + +#ifdef CONFIG_NFSD_PNFS +static __be32 +nfsd4_decode_deviceid4(struct nfsd4_compoundargs *argp, + struct nfsd4_deviceid *devid) +{ + __be32 *p; - DECODE_TAIL; + p = xdr_inline_decode(argp->xdr, NFS4_DEVICEID4_SIZE); + if (!p) + return nfserr_bad_xdr; + memcpy(devid, p, sizeof(*devid)); + return nfs_ok; } static __be32 -nfsd4_decode_access(struct nfsd4_compoundargs *argp, struct nfsd4_access *access) +nfsd4_decode_layoutupdate4(struct nfsd4_compoundargs *argp, + struct nfsd4_layoutcommit *lcp) { - DECODE_HEAD; + if (xdr_stream_decode_u32(argp->xdr, &lcp->lc_layout_type) < 0) + return nfserr_bad_xdr; + if (lcp->lc_layout_type < LAYOUT_NFSV4_1_FILES) + return nfserr_bad_xdr; + if (lcp->lc_layout_type >= LAYOUT_TYPE_MAX) + return nfserr_bad_xdr; - READ_BUF(4); - access->ac_req_access = be32_to_cpup(p++); + if (xdr_stream_decode_u32(argp->xdr, &lcp->lc_up_len) < 0) + return nfserr_bad_xdr; + if (lcp->lc_up_len > 0) { + lcp->lc_up_layout = xdr_inline_decode(argp->xdr, lcp->lc_up_len); + if (!lcp->lc_up_layout) + return nfserr_bad_xdr; + } - DECODE_TAIL; + return nfs_ok; } -static __be32 nfsd4_decode_cb_sec(struct nfsd4_compoundargs *argp, struct nfsd4_cb_sec *cbs) +static __be32 +nfsd4_decode_layoutreturn4(struct nfsd4_compoundargs *argp, + struct nfsd4_layoutreturn *lrp) { - DECODE_HEAD; - struct user_namespace *userns = nfsd_user_namespace(argp->rqstp); - u32 dummy, uid, gid; - char *machine_name; - int i; - int nr_secflavs; + __be32 status; + + if (xdr_stream_decode_u32(argp->xdr, &lrp->lr_return_type) < 0) + return nfserr_bad_xdr; + switch (lrp->lr_return_type) { + case RETURN_FILE: + if (xdr_stream_decode_u64(argp->xdr, &lrp->lr_seg.offset) < 0) + return nfserr_bad_xdr; + if (xdr_stream_decode_u64(argp->xdr, &lrp->lr_seg.length) < 0) + return nfserr_bad_xdr; + status = nfsd4_decode_stateid4(argp, &lrp->lr_sid); + if (status) + return status; + if (xdr_stream_decode_u32(argp->xdr, &lrp->lrf_body_len) < 0) + return nfserr_bad_xdr; + if (lrp->lrf_body_len > 0) { + lrp->lrf_body = xdr_inline_decode(argp->xdr, lrp->lrf_body_len); + if (!lrp->lrf_body) + return nfserr_bad_xdr; + } + break; + case RETURN_FSID: + case RETURN_ALL: + lrp->lr_seg.offset = 0; + lrp->lr_seg.length = NFS4_MAX_UINT64; + break; + default: + return nfserr_bad_xdr; + } + + return nfs_ok; +} + +#endif /* CONFIG_NFSD_PNFS */ + +static __be32 +nfsd4_decode_sessionid4(struct nfsd4_compoundargs *argp, + struct nfs4_sessionid *sessionid) +{ + __be32 *p; + + p = xdr_inline_decode(argp->xdr, NFS4_MAX_SESSIONID_LEN); + if (!p) + return nfserr_bad_xdr; + memcpy(sessionid->data, p, sizeof(sessionid->data)); + return nfs_ok; +} + +/* Defined in Appendix A of RFC 5531 */ +static __be32 +nfsd4_decode_authsys_parms(struct nfsd4_compoundargs *argp, + struct nfsd4_cb_sec *cbs) +{ + u32 stamp, gidcount, uid, gid; + __be32 *p, status; + + if (xdr_stream_decode_u32(argp->xdr, &stamp) < 0) + return nfserr_bad_xdr; + /* machine name */ + status = nfsd4_decode_ignored_string(argp, 255); + if (status) + return status; + if (xdr_stream_decode_u32(argp->xdr, &uid) < 0) + return nfserr_bad_xdr; + if (xdr_stream_decode_u32(argp->xdr, &gid) < 0) + return nfserr_bad_xdr; + if (xdr_stream_decode_u32(argp->xdr, &gidcount) < 0) + return nfserr_bad_xdr; + if (gidcount > 16) + return nfserr_bad_xdr; + p = xdr_inline_decode(argp->xdr, gidcount << 2); + if (!p) + return nfserr_bad_xdr; + if (cbs->flavor == (u32)(-1)) { + struct user_namespace *userns = nfsd_user_namespace(argp->rqstp); + + kuid_t kuid = make_kuid(userns, uid); + kgid_t kgid = make_kgid(userns, gid); + if (uid_valid(kuid) && gid_valid(kgid)) { + cbs->uid = kuid; + cbs->gid = kgid; + cbs->flavor = RPC_AUTH_UNIX; + } else { + dprintk("RPC_AUTH_UNIX with invalid uid or gid, ignoring!\n"); + } + } + + return nfs_ok; +} + +static __be32 +nfsd4_decode_gss_cb_handles4(struct nfsd4_compoundargs *argp, + struct nfsd4_cb_sec *cbs) +{ + __be32 status; + u32 service; + + dprintk("RPC_AUTH_GSS callback secflavor not supported!\n"); + + if (xdr_stream_decode_u32(argp->xdr, &service) < 0) + return nfserr_bad_xdr; + if (service < RPC_GSS_SVC_NONE || service > RPC_GSS_SVC_PRIVACY) + return nfserr_bad_xdr; + /* gcbp_handle_from_server */ + status = nfsd4_decode_ignored_string(argp, 0); + if (status) + return status; + /* gcbp_handle_from_client */ + status = nfsd4_decode_ignored_string(argp, 0); + if (status) + return status; + + return nfs_ok; +} + +/* a counted array of callback_sec_parms4 items */ +static __be32 +nfsd4_decode_cb_sec(struct nfsd4_compoundargs *argp, struct nfsd4_cb_sec *cbs) +{ + u32 i, secflavor, nr_secflavs; + __be32 status; /* callback_sec_params4 */ - READ_BUF(4); - nr_secflavs = be32_to_cpup(p++); + if (xdr_stream_decode_u32(argp->xdr, &nr_secflavs) < 0) + return nfserr_bad_xdr; if (nr_secflavs) cbs->flavor = (u32)(-1); else /* Is this legal? Be generous, take it to mean AUTH_NONE: */ cbs->flavor = 0; + for (i = 0; i < nr_secflavs; ++i) { - READ_BUF(4); - dummy = be32_to_cpup(p++); - switch (dummy) { + if (xdr_stream_decode_u32(argp->xdr, &secflavor) < 0) + return nfserr_bad_xdr; + switch (secflavor) { case RPC_AUTH_NULL: - /* Nothing to read */ + /* void */ if (cbs->flavor == (u32)(-1)) cbs->flavor = RPC_AUTH_NULL; break; case RPC_AUTH_UNIX: - READ_BUF(8); - /* stamp */ - dummy = be32_to_cpup(p++); - - /* machine name */ - dummy = be32_to_cpup(p++); - READ_BUF(dummy); - SAVEMEM(machine_name, dummy); - - /* uid, gid */ - READ_BUF(8); - uid = be32_to_cpup(p++); - gid = be32_to_cpup(p++); - - /* more gids */ - READ_BUF(4); - dummy = be32_to_cpup(p++); - READ_BUF(dummy * 4); - if (cbs->flavor == (u32)(-1)) { - kuid_t kuid = make_kuid(userns, uid); - kgid_t kgid = make_kgid(userns, gid); - if (uid_valid(kuid) && gid_valid(kgid)) { - cbs->uid = kuid; - cbs->gid = kgid; - cbs->flavor = RPC_AUTH_UNIX; - } else { - dprintk("RPC_AUTH_UNIX with invalid" - "uid or gid ignoring!\n"); - } - } + status = nfsd4_decode_authsys_parms(argp, cbs); + if (status) + return status; break; case RPC_AUTH_GSS: - dprintk("RPC_AUTH_GSS callback secflavor " - "not supported!\n"); - READ_BUF(8); - /* gcbp_service */ - dummy = be32_to_cpup(p++); - /* gcbp_handle_from_server */ - dummy = be32_to_cpup(p++); - READ_BUF(dummy); - p += XDR_QUADLEN(dummy); - /* gcbp_handle_from_client */ - READ_BUF(4); - dummy = be32_to_cpup(p++); - READ_BUF(dummy); + status = nfsd4_decode_gss_cb_handles4(argp, cbs); + if (status) + return status; break; default: - dprintk("Illegal callback secflavor\n"); return nfserr_inval; } } - DECODE_TAIL; -} -static __be32 nfsd4_decode_backchannel_ctl(struct nfsd4_compoundargs *argp, struct nfsd4_backchannel_ctl *bc) -{ - DECODE_HEAD; + return nfs_ok; +} - READ_BUF(4); - bc->bc_cb_program = be32_to_cpup(p++); - nfsd4_decode_cb_sec(argp, &bc->bc_cb_sec); - DECODE_TAIL; -} +/* + * NFSv4 operation argument decoders + */ -static __be32 nfsd4_decode_bind_conn_to_session(struct nfsd4_compoundargs *argp, struct nfsd4_bind_conn_to_session *bcts) +static __be32 +nfsd4_decode_access(struct nfsd4_compoundargs *argp, + struct nfsd4_access *access) { - DECODE_HEAD; - - READ_BUF(NFS4_MAX_SESSIONID_LEN + 8); - COPYMEM(bcts->sessionid.data, NFS4_MAX_SESSIONID_LEN); - bcts->dir = be32_to_cpup(p++); - /* XXX: skipping ctsa_use_conn_in_rdma_mode. Perhaps Tom Tucker - * could help us figure out we should be using it. */ - DECODE_TAIL; + if (xdr_stream_decode_u32(argp->xdr, &access->ac_req_access) < 0) + return nfserr_bad_xdr; + return nfs_ok; } static __be32 nfsd4_decode_close(struct nfsd4_compoundargs *argp, struct nfsd4_close *close) { - DECODE_HEAD; - - READ_BUF(4); - close->cl_seqid = be32_to_cpup(p++); - return nfsd4_decode_stateid(argp, &close->cl_stateid); - - DECODE_TAIL; + if (xdr_stream_decode_u32(argp->xdr, &close->cl_seqid) < 0) + return nfserr_bad_xdr; + return nfsd4_decode_stateid4(argp, &close->cl_stateid); } static __be32 nfsd4_decode_commit(struct nfsd4_compoundargs *argp, struct nfsd4_commit *commit) { - DECODE_HEAD; - - READ_BUF(12); - p = xdr_decode_hyper(p, &commit->co_offset); - commit->co_count = be32_to_cpup(p++); - - DECODE_TAIL; + if (xdr_stream_decode_u64(argp->xdr, &commit->co_offset) < 0) + return nfserr_bad_xdr; + if (xdr_stream_decode_u32(argp->xdr, &commit->co_count) < 0) + return nfserr_bad_xdr; + memset(&commit->co_verf, 0, sizeof(commit->co_verf)); + return nfs_ok; } static __be32 nfsd4_decode_create(struct nfsd4_compoundargs *argp, struct nfsd4_create *create) { - DECODE_HEAD; + __be32 *p, status; - READ_BUF(4); - create->cr_type = be32_to_cpup(p++); + memset(create, 0, sizeof(*create)); + if (xdr_stream_decode_u32(argp->xdr, &create->cr_type) < 0) + return nfserr_bad_xdr; switch (create->cr_type) { case NF4LNK: - READ_BUF(4); - create->cr_datalen = be32_to_cpup(p++); - READ_BUF(create->cr_datalen); + if (xdr_stream_decode_u32(argp->xdr, &create->cr_datalen) < 0) + return nfserr_bad_xdr; + p = xdr_inline_decode(argp->xdr, create->cr_datalen); + if (!p) + return nfserr_bad_xdr; create->cr_data = svcxdr_dupstr(argp, p, create->cr_datalen); if (!create->cr_data) return nfserr_jukebox; break; case NF4BLK: case NF4CHR: - READ_BUF(8); - create->cr_specdata1 = be32_to_cpup(p++); - create->cr_specdata2 = be32_to_cpup(p++); + if (xdr_stream_decode_u32(argp->xdr, &create->cr_specdata1) < 0) + return nfserr_bad_xdr; + if (xdr_stream_decode_u32(argp->xdr, &create->cr_specdata2) < 0) + return nfserr_bad_xdr; break; case NF4SOCK: case NF4FIFO: @@ -680,151 +829,214 @@ nfsd4_decode_create(struct nfsd4_compoundargs *argp, struct nfsd4_create *create default: break; } - - READ_BUF(4); - create->cr_namelen = be32_to_cpup(p++); - READ_BUF(create->cr_namelen); - SAVEMEM(create->cr_name, create->cr_namelen); - if ((status = check_filename(create->cr_name, create->cr_namelen))) + status = nfsd4_decode_component4(argp, &create->cr_name, + &create->cr_namelen); + if (status) return status; - - status = nfsd4_decode_fattr(argp, create->cr_bmval, &create->cr_iattr, - &create->cr_acl, &create->cr_label, - &create->cr_umask); + status = nfsd4_decode_fattr4(argp, create->cr_bmval, + ARRAY_SIZE(create->cr_bmval), + &create->cr_iattr, &create->cr_acl, + &create->cr_label, &create->cr_umask); if (status) - goto out; + return status; - DECODE_TAIL; + return nfs_ok; } static inline __be32 nfsd4_decode_delegreturn(struct nfsd4_compoundargs *argp, struct nfsd4_delegreturn *dr) { - return nfsd4_decode_stateid(argp, &dr->dr_stateid); + return nfsd4_decode_stateid4(argp, &dr->dr_stateid); } static inline __be32 nfsd4_decode_getattr(struct nfsd4_compoundargs *argp, struct nfsd4_getattr *getattr) { - return nfsd4_decode_bitmap(argp, getattr->ga_bmval); + memset(getattr, 0, sizeof(*getattr)); + return nfsd4_decode_bitmap4(argp, getattr->ga_bmval, + ARRAY_SIZE(getattr->ga_bmval)); } static __be32 nfsd4_decode_link(struct nfsd4_compoundargs *argp, struct nfsd4_link *link) { - DECODE_HEAD; + memset(link, 0, sizeof(*link)); + return nfsd4_decode_component4(argp, &link->li_name, &link->li_namelen); +} + +static __be32 +nfsd4_decode_open_to_lock_owner4(struct nfsd4_compoundargs *argp, + struct nfsd4_lock *lock) +{ + __be32 status; + + if (xdr_stream_decode_u32(argp->xdr, &lock->lk_new_open_seqid) < 0) + return nfserr_bad_xdr; + status = nfsd4_decode_stateid4(argp, &lock->lk_new_open_stateid); + if (status) + return status; + if (xdr_stream_decode_u32(argp->xdr, &lock->lk_new_lock_seqid) < 0) + return nfserr_bad_xdr; + return nfsd4_decode_state_owner4(argp, &lock->lk_new_clientid, + &lock->lk_new_owner); +} + +static __be32 +nfsd4_decode_exist_lock_owner4(struct nfsd4_compoundargs *argp, + struct nfsd4_lock *lock) +{ + __be32 status; - READ_BUF(4); - link->li_namelen = be32_to_cpup(p++); - READ_BUF(link->li_namelen); - SAVEMEM(link->li_name, link->li_namelen); - if ((status = check_filename(link->li_name, link->li_namelen))) + status = nfsd4_decode_stateid4(argp, &lock->lk_old_lock_stateid); + if (status) return status; + if (xdr_stream_decode_u32(argp->xdr, &lock->lk_old_lock_seqid) < 0) + return nfserr_bad_xdr; - DECODE_TAIL; + return nfs_ok; } static __be32 -nfsd4_decode_lock(struct nfsd4_compoundargs *argp, struct nfsd4_lock *lock) +nfsd4_decode_locker4(struct nfsd4_compoundargs *argp, struct nfsd4_lock *lock) { - DECODE_HEAD; + if (xdr_stream_decode_bool(argp->xdr, &lock->lk_is_new) < 0) + return nfserr_bad_xdr; + if (lock->lk_is_new) + return nfsd4_decode_open_to_lock_owner4(argp, lock); + return nfsd4_decode_exist_lock_owner4(argp, lock); +} - /* - * type, reclaim(boolean), offset, length, new_lock_owner(boolean) - */ - READ_BUF(28); - lock->lk_type = be32_to_cpup(p++); +static __be32 +nfsd4_decode_lock(struct nfsd4_compoundargs *argp, struct nfsd4_lock *lock) +{ + memset(lock, 0, sizeof(*lock)); + if (xdr_stream_decode_u32(argp->xdr, &lock->lk_type) < 0) + return nfserr_bad_xdr; if ((lock->lk_type < NFS4_READ_LT) || (lock->lk_type > NFS4_WRITEW_LT)) - goto xdr_error; - lock->lk_reclaim = be32_to_cpup(p++); - p = xdr_decode_hyper(p, &lock->lk_offset); - p = xdr_decode_hyper(p, &lock->lk_length); - lock->lk_is_new = be32_to_cpup(p++); - - if (lock->lk_is_new) { - READ_BUF(4); - lock->lk_new_open_seqid = be32_to_cpup(p++); - status = nfsd4_decode_stateid(argp, &lock->lk_new_open_stateid); - if (status) - return status; - READ_BUF(8 + sizeof(clientid_t)); - lock->lk_new_lock_seqid = be32_to_cpup(p++); - COPYMEM(&lock->lk_new_clientid, sizeof(clientid_t)); - lock->lk_new_owner.len = be32_to_cpup(p++); - READ_BUF(lock->lk_new_owner.len); - READMEM(lock->lk_new_owner.data, lock->lk_new_owner.len); - } else { - status = nfsd4_decode_stateid(argp, &lock->lk_old_lock_stateid); - if (status) - return status; - READ_BUF(4); - lock->lk_old_lock_seqid = be32_to_cpup(p++); - } - - DECODE_TAIL; + return nfserr_bad_xdr; + if (xdr_stream_decode_bool(argp->xdr, &lock->lk_reclaim) < 0) + return nfserr_bad_xdr; + if (xdr_stream_decode_u64(argp->xdr, &lock->lk_offset) < 0) + return nfserr_bad_xdr; + if (xdr_stream_decode_u64(argp->xdr, &lock->lk_length) < 0) + return nfserr_bad_xdr; + return nfsd4_decode_locker4(argp, lock); } static __be32 nfsd4_decode_lockt(struct nfsd4_compoundargs *argp, struct nfsd4_lockt *lockt) { - DECODE_HEAD; - - READ_BUF(32); - lockt->lt_type = be32_to_cpup(p++); - if((lockt->lt_type < NFS4_READ_LT) || (lockt->lt_type > NFS4_WRITEW_LT)) - goto xdr_error; - p = xdr_decode_hyper(p, &lockt->lt_offset); - p = xdr_decode_hyper(p, &lockt->lt_length); - COPYMEM(&lockt->lt_clientid, 8); - lockt->lt_owner.len = be32_to_cpup(p++); - READ_BUF(lockt->lt_owner.len); - READMEM(lockt->lt_owner.data, lockt->lt_owner.len); - - DECODE_TAIL; + memset(lockt, 0, sizeof(*lockt)); + if (xdr_stream_decode_u32(argp->xdr, &lockt->lt_type) < 0) + return nfserr_bad_xdr; + if ((lockt->lt_type < NFS4_READ_LT) || (lockt->lt_type > NFS4_WRITEW_LT)) + return nfserr_bad_xdr; + if (xdr_stream_decode_u64(argp->xdr, &lockt->lt_offset) < 0) + return nfserr_bad_xdr; + if (xdr_stream_decode_u64(argp->xdr, &lockt->lt_length) < 0) + return nfserr_bad_xdr; + return nfsd4_decode_state_owner4(argp, &lockt->lt_clientid, + &lockt->lt_owner); } static __be32 nfsd4_decode_locku(struct nfsd4_compoundargs *argp, struct nfsd4_locku *locku) { - DECODE_HEAD; + __be32 status; - READ_BUF(8); - locku->lu_type = be32_to_cpup(p++); + if (xdr_stream_decode_u32(argp->xdr, &locku->lu_type) < 0) + return nfserr_bad_xdr; if ((locku->lu_type < NFS4_READ_LT) || (locku->lu_type > NFS4_WRITEW_LT)) - goto xdr_error; - locku->lu_seqid = be32_to_cpup(p++); - status = nfsd4_decode_stateid(argp, &locku->lu_stateid); + return nfserr_bad_xdr; + if (xdr_stream_decode_u32(argp->xdr, &locku->lu_seqid) < 0) + return nfserr_bad_xdr; + status = nfsd4_decode_stateid4(argp, &locku->lu_stateid); if (status) return status; - READ_BUF(16); - p = xdr_decode_hyper(p, &locku->lu_offset); - p = xdr_decode_hyper(p, &locku->lu_length); + if (xdr_stream_decode_u64(argp->xdr, &locku->lu_offset) < 0) + return nfserr_bad_xdr; + if (xdr_stream_decode_u64(argp->xdr, &locku->lu_length) < 0) + return nfserr_bad_xdr; - DECODE_TAIL; + return nfs_ok; } static __be32 nfsd4_decode_lookup(struct nfsd4_compoundargs *argp, struct nfsd4_lookup *lookup) { - DECODE_HEAD; + return nfsd4_decode_component4(argp, &lookup->lo_name, &lookup->lo_len); +} - READ_BUF(4); - lookup->lo_len = be32_to_cpup(p++); - READ_BUF(lookup->lo_len); - SAVEMEM(lookup->lo_name, lookup->lo_len); - if ((status = check_filename(lookup->lo_name, lookup->lo_len))) - return status; +static __be32 +nfsd4_decode_createhow4(struct nfsd4_compoundargs *argp, struct nfsd4_open *open) +{ + __be32 status; + + if (xdr_stream_decode_u32(argp->xdr, &open->op_createmode) < 0) + return nfserr_bad_xdr; + switch (open->op_createmode) { + case NFS4_CREATE_UNCHECKED: + case NFS4_CREATE_GUARDED: + status = nfsd4_decode_fattr4(argp, open->op_bmval, + ARRAY_SIZE(open->op_bmval), + &open->op_iattr, &open->op_acl, + &open->op_label, &open->op_umask); + if (status) + return status; + break; + case NFS4_CREATE_EXCLUSIVE: + status = nfsd4_decode_verifier4(argp, &open->op_verf); + if (status) + return status; + break; + case NFS4_CREATE_EXCLUSIVE4_1: + if (argp->minorversion < 1) + return nfserr_bad_xdr; + status = nfsd4_decode_verifier4(argp, &open->op_verf); + if (status) + return status; + status = nfsd4_decode_fattr4(argp, open->op_bmval, + ARRAY_SIZE(open->op_bmval), + &open->op_iattr, &open->op_acl, + &open->op_label, &open->op_umask); + if (status) + return status; + break; + default: + return nfserr_bad_xdr; + } + + return nfs_ok; +} + +static __be32 +nfsd4_decode_openflag4(struct nfsd4_compoundargs *argp, struct nfsd4_open *open) +{ + __be32 status; + + if (xdr_stream_decode_u32(argp->xdr, &open->op_create) < 0) + return nfserr_bad_xdr; + switch (open->op_create) { + case NFS4_OPEN_NOCREATE: + break; + case NFS4_OPEN_CREATE: + status = nfsd4_decode_createhow4(argp, open); + if (status) + return status; + break; + default: + return nfserr_bad_xdr; + } - DECODE_TAIL; + return nfs_ok; } static __be32 nfsd4_decode_share_access(struct nfsd4_compoundargs *argp, u32 *share_access, u32 *deleg_want, u32 *deleg_when) { - __be32 *p; u32 w; - READ_BUF(4); - w = be32_to_cpup(p++); + if (xdr_stream_decode_u32(argp->xdr, &w) < 0) + return nfserr_bad_xdr; *share_access = w & NFS4_SHARE_ACCESS_MASK; *deleg_want = w & NFS4_SHARE_WANT_MASK; if (deleg_when) @@ -867,206 +1079,153 @@ static __be32 nfsd4_decode_share_access(struct nfsd4_compoundargs *argp, u32 *sh NFS4_SHARE_PUSH_DELEG_WHEN_UNCONTENDED): return nfs_ok; } -xdr_error: return nfserr_bad_xdr; } static __be32 nfsd4_decode_share_deny(struct nfsd4_compoundargs *argp, u32 *x) { - __be32 *p; - - READ_BUF(4); - *x = be32_to_cpup(p++); - /* Note: unlinke access bits, deny bits may be zero. */ - if (*x & ~NFS4_SHARE_DENY_BOTH) + if (xdr_stream_decode_u32(argp->xdr, x) < 0) return nfserr_bad_xdr; - return nfs_ok; -xdr_error: - return nfserr_bad_xdr; -} - -static __be32 nfsd4_decode_opaque(struct nfsd4_compoundargs *argp, struct xdr_netobj *o) -{ - __be32 *p; - - READ_BUF(4); - o->len = be32_to_cpup(p++); - - if (o->len == 0 || o->len > NFS4_OPAQUE_LIMIT) + /* Note: unlike access bits, deny bits may be zero. */ + if (*x & ~NFS4_SHARE_DENY_BOTH) return nfserr_bad_xdr; - READ_BUF(o->len); - SAVEMEM(o->data, o->len); return nfs_ok; -xdr_error: - return nfserr_bad_xdr; } static __be32 -nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open) +nfsd4_decode_open_claim4(struct nfsd4_compoundargs *argp, + struct nfsd4_open *open) { - DECODE_HEAD; - u32 dummy; - - memset(open->op_bmval, 0, sizeof(open->op_bmval)); - open->op_iattr.ia_valid = 0; - open->op_openowner = NULL; - - open->op_xdr_error = 0; - /* seqid, share_access, share_deny, clientid, ownerlen */ - READ_BUF(4); - open->op_seqid = be32_to_cpup(p++); - /* decode, yet ignore deleg_when until supported */ - status = nfsd4_decode_share_access(argp, &open->op_share_access, - &open->op_deleg_want, &dummy); - if (status) - goto xdr_error; - status = nfsd4_decode_share_deny(argp, &open->op_share_deny); - if (status) - goto xdr_error; - READ_BUF(sizeof(clientid_t)); - COPYMEM(&open->op_clientid, sizeof(clientid_t)); - status = nfsd4_decode_opaque(argp, &open->op_owner); - if (status) - goto xdr_error; - READ_BUF(4); - open->op_create = be32_to_cpup(p++); - switch (open->op_create) { - case NFS4_OPEN_NOCREATE: - break; - case NFS4_OPEN_CREATE: - READ_BUF(4); - open->op_createmode = be32_to_cpup(p++); - switch (open->op_createmode) { - case NFS4_CREATE_UNCHECKED: - case NFS4_CREATE_GUARDED: - status = nfsd4_decode_fattr(argp, open->op_bmval, - &open->op_iattr, &open->op_acl, &open->op_label, - &open->op_umask); - if (status) - goto out; - break; - case NFS4_CREATE_EXCLUSIVE: - READ_BUF(NFS4_VERIFIER_SIZE); - COPYMEM(open->op_verf.data, NFS4_VERIFIER_SIZE); - break; - case NFS4_CREATE_EXCLUSIVE4_1: - if (argp->minorversion < 1) - goto xdr_error; - READ_BUF(NFS4_VERIFIER_SIZE); - COPYMEM(open->op_verf.data, NFS4_VERIFIER_SIZE); - status = nfsd4_decode_fattr(argp, open->op_bmval, - &open->op_iattr, &open->op_acl, &open->op_label, - &open->op_umask); - if (status) - goto out; - break; - default: - goto xdr_error; - } - break; - default: - goto xdr_error; - } + __be32 status; - /* open_claim */ - READ_BUF(4); - open->op_claim_type = be32_to_cpup(p++); + if (xdr_stream_decode_u32(argp->xdr, &open->op_claim_type) < 0) + return nfserr_bad_xdr; switch (open->op_claim_type) { case NFS4_OPEN_CLAIM_NULL: case NFS4_OPEN_CLAIM_DELEGATE_PREV: - READ_BUF(4); - open->op_fname.len = be32_to_cpup(p++); - READ_BUF(open->op_fname.len); - SAVEMEM(open->op_fname.data, open->op_fname.len); - if ((status = check_filename(open->op_fname.data, open->op_fname.len))) + status = nfsd4_decode_component4(argp, &open->op_fname, + &open->op_fnamelen); + if (status) return status; break; case NFS4_OPEN_CLAIM_PREVIOUS: - READ_BUF(4); - open->op_delegate_type = be32_to_cpup(p++); + if (xdr_stream_decode_u32(argp->xdr, &open->op_delegate_type) < 0) + return nfserr_bad_xdr; break; case NFS4_OPEN_CLAIM_DELEGATE_CUR: - status = nfsd4_decode_stateid(argp, &open->op_delegate_stateid); + status = nfsd4_decode_stateid4(argp, &open->op_delegate_stateid); if (status) return status; - READ_BUF(4); - open->op_fname.len = be32_to_cpup(p++); - READ_BUF(open->op_fname.len); - SAVEMEM(open->op_fname.data, open->op_fname.len); - if ((status = check_filename(open->op_fname.data, open->op_fname.len))) + status = nfsd4_decode_component4(argp, &open->op_fname, + &open->op_fnamelen); + if (status) return status; break; case NFS4_OPEN_CLAIM_FH: case NFS4_OPEN_CLAIM_DELEG_PREV_FH: if (argp->minorversion < 1) - goto xdr_error; + return nfserr_bad_xdr; /* void */ break; case NFS4_OPEN_CLAIM_DELEG_CUR_FH: if (argp->minorversion < 1) - goto xdr_error; - status = nfsd4_decode_stateid(argp, &open->op_delegate_stateid); + return nfserr_bad_xdr; + status = nfsd4_decode_stateid4(argp, &open->op_delegate_stateid); if (status) return status; break; default: - goto xdr_error; + return nfserr_bad_xdr; } - DECODE_TAIL; + return nfs_ok; +} + +static __be32 +nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open) +{ + __be32 status; + u32 dummy; + + memset(open, 0, sizeof(*open)); + + if (xdr_stream_decode_u32(argp->xdr, &open->op_seqid) < 0) + return nfserr_bad_xdr; + /* deleg_want is ignored */ + status = nfsd4_decode_share_access(argp, &open->op_share_access, + &open->op_deleg_want, &dummy); + if (status) + return status; + status = nfsd4_decode_share_deny(argp, &open->op_share_deny); + if (status) + return status; + status = nfsd4_decode_state_owner4(argp, &open->op_clientid, + &open->op_owner); + if (status) + return status; + status = nfsd4_decode_openflag4(argp, open); + if (status) + return status; + return nfsd4_decode_open_claim4(argp, open); } static __be32 nfsd4_decode_open_confirm(struct nfsd4_compoundargs *argp, struct nfsd4_open_confirm *open_conf) { - DECODE_HEAD; + __be32 status; if (argp->minorversion >= 1) return nfserr_notsupp; - status = nfsd4_decode_stateid(argp, &open_conf->oc_req_stateid); + status = nfsd4_decode_stateid4(argp, &open_conf->oc_req_stateid); if (status) return status; - READ_BUF(4); - open_conf->oc_seqid = be32_to_cpup(p++); + if (xdr_stream_decode_u32(argp->xdr, &open_conf->oc_seqid) < 0) + return nfserr_bad_xdr; - DECODE_TAIL; + memset(&open_conf->oc_resp_stateid, 0, + sizeof(open_conf->oc_resp_stateid)); + return nfs_ok; } static __be32 nfsd4_decode_open_downgrade(struct nfsd4_compoundargs *argp, struct nfsd4_open_downgrade *open_down) { - DECODE_HEAD; - - status = nfsd4_decode_stateid(argp, &open_down->od_stateid); + __be32 status; + + memset(open_down, 0, sizeof(*open_down)); + status = nfsd4_decode_stateid4(argp, &open_down->od_stateid); if (status) return status; - READ_BUF(4); - open_down->od_seqid = be32_to_cpup(p++); + if (xdr_stream_decode_u32(argp->xdr, &open_down->od_seqid) < 0) + return nfserr_bad_xdr; + /* deleg_want is ignored */ status = nfsd4_decode_share_access(argp, &open_down->od_share_access, &open_down->od_deleg_want, NULL); if (status) return status; - status = nfsd4_decode_share_deny(argp, &open_down->od_share_deny); - if (status) - return status; - DECODE_TAIL; + return nfsd4_decode_share_deny(argp, &open_down->od_share_deny); } static __be32 nfsd4_decode_putfh(struct nfsd4_compoundargs *argp, struct nfsd4_putfh *putfh) { - DECODE_HEAD; + __be32 *p; - READ_BUF(4); - putfh->pf_fhlen = be32_to_cpup(p++); + if (xdr_stream_decode_u32(argp->xdr, &putfh->pf_fhlen) < 0) + return nfserr_bad_xdr; if (putfh->pf_fhlen > NFS4_FHSIZE) - goto xdr_error; - READ_BUF(putfh->pf_fhlen); - SAVEMEM(putfh->pf_fhval, putfh->pf_fhlen); + return nfserr_bad_xdr; + p = xdr_inline_decode(argp->xdr, putfh->pf_fhlen); + if (!p) + return nfserr_bad_xdr; + putfh->pf_fhval = svcxdr_savemem(argp, p, putfh->pf_fhlen); + if (!putfh->pf_fhval) + return nfserr_jukebox; - DECODE_TAIL; + putfh->no_verify = false; + return nfs_ok; } static __be32 @@ -1080,109 +1239,73 @@ nfsd4_decode_putpubfh(struct nfsd4_compoundargs *argp, void *p) static __be32 nfsd4_decode_read(struct nfsd4_compoundargs *argp, struct nfsd4_read *read) { - DECODE_HEAD; + __be32 status; - status = nfsd4_decode_stateid(argp, &read->rd_stateid); + memset(read, 0, sizeof(*read)); + status = nfsd4_decode_stateid4(argp, &read->rd_stateid); if (status) return status; - READ_BUF(12); - p = xdr_decode_hyper(p, &read->rd_offset); - read->rd_length = be32_to_cpup(p++); + if (xdr_stream_decode_u64(argp->xdr, &read->rd_offset) < 0) + return nfserr_bad_xdr; + if (xdr_stream_decode_u32(argp->xdr, &read->rd_length) < 0) + return nfserr_bad_xdr; - DECODE_TAIL; + return nfs_ok; } static __be32 nfsd4_decode_readdir(struct nfsd4_compoundargs *argp, struct nfsd4_readdir *readdir) { - DECODE_HEAD; + __be32 status; - READ_BUF(24); - p = xdr_decode_hyper(p, &readdir->rd_cookie); - COPYMEM(readdir->rd_verf.data, sizeof(readdir->rd_verf.data)); - readdir->rd_dircount = be32_to_cpup(p++); - readdir->rd_maxcount = be32_to_cpup(p++); - if ((status = nfsd4_decode_bitmap(argp, readdir->rd_bmval))) - goto out; + memset(readdir, 0, sizeof(*readdir)); + if (xdr_stream_decode_u64(argp->xdr, &readdir->rd_cookie) < 0) + return nfserr_bad_xdr; + status = nfsd4_decode_verifier4(argp, &readdir->rd_verf); + if (status) + return status; + if (xdr_stream_decode_u32(argp->xdr, &readdir->rd_dircount) < 0) + return nfserr_bad_xdr; + if (xdr_stream_decode_u32(argp->xdr, &readdir->rd_maxcount) < 0) + return nfserr_bad_xdr; + if (xdr_stream_decode_uint32_array(argp->xdr, readdir->rd_bmval, + ARRAY_SIZE(readdir->rd_bmval)) < 0) + return nfserr_bad_xdr; - DECODE_TAIL; + return nfs_ok; } static __be32 nfsd4_decode_remove(struct nfsd4_compoundargs *argp, struct nfsd4_remove *remove) { - DECODE_HEAD; - - READ_BUF(4); - remove->rm_namelen = be32_to_cpup(p++); - READ_BUF(remove->rm_namelen); - SAVEMEM(remove->rm_name, remove->rm_namelen); - if ((status = check_filename(remove->rm_name, remove->rm_namelen))) - return status; - - DECODE_TAIL; + memset(&remove->rm_cinfo, 0, sizeof(remove->rm_cinfo)); + return nfsd4_decode_component4(argp, &remove->rm_name, &remove->rm_namelen); } static __be32 nfsd4_decode_rename(struct nfsd4_compoundargs *argp, struct nfsd4_rename *rename) { - DECODE_HEAD; - - READ_BUF(4); - rename->rn_snamelen = be32_to_cpup(p++); - READ_BUF(rename->rn_snamelen); - SAVEMEM(rename->rn_sname, rename->rn_snamelen); - READ_BUF(4); - rename->rn_tnamelen = be32_to_cpup(p++); - READ_BUF(rename->rn_tnamelen); - SAVEMEM(rename->rn_tname, rename->rn_tnamelen); - if ((status = check_filename(rename->rn_sname, rename->rn_snamelen))) - return status; - if ((status = check_filename(rename->rn_tname, rename->rn_tnamelen))) - return status; + __be32 status; - DECODE_TAIL; + memset(rename, 0, sizeof(*rename)); + status = nfsd4_decode_component4(argp, &rename->rn_sname, &rename->rn_snamelen); + if (status) + return status; + return nfsd4_decode_component4(argp, &rename->rn_tname, &rename->rn_tnamelen); } static __be32 nfsd4_decode_renew(struct nfsd4_compoundargs *argp, clientid_t *clientid) { - DECODE_HEAD; - - if (argp->minorversion >= 1) - return nfserr_notsupp; - - READ_BUF(sizeof(clientid_t)); - COPYMEM(clientid, sizeof(clientid_t)); - - DECODE_TAIL; + return nfsd4_decode_clientid4(argp, clientid); } static __be32 nfsd4_decode_secinfo(struct nfsd4_compoundargs *argp, struct nfsd4_secinfo *secinfo) { - DECODE_HEAD; - - READ_BUF(4); - secinfo->si_namelen = be32_to_cpup(p++); - READ_BUF(secinfo->si_namelen); - SAVEMEM(secinfo->si_name, secinfo->si_namelen); - status = check_filename(secinfo->si_name, secinfo->si_namelen); - if (status) - return status; - DECODE_TAIL; -} - -static __be32 -nfsd4_decode_secinfo_no_name(struct nfsd4_compoundargs *argp, - struct nfsd4_secinfo_no_name *sin) -{ - DECODE_HEAD; - - READ_BUF(4); - sin->sin_style = be32_to_cpup(p++); - DECODE_TAIL; + secinfo->si_exp = NULL; + return nfsd4_decode_component4(argp, &secinfo->si_name, &secinfo->si_namelen); } static __be32 @@ -1190,388 +1313,384 @@ nfsd4_decode_setattr(struct nfsd4_compoundargs *argp, struct nfsd4_setattr *seta { __be32 status; - status = nfsd4_decode_stateid(argp, &setattr->sa_stateid); + memset(setattr, 0, sizeof(*setattr)); + status = nfsd4_decode_stateid4(argp, &setattr->sa_stateid); if (status) return status; - return nfsd4_decode_fattr(argp, setattr->sa_bmval, &setattr->sa_iattr, - &setattr->sa_acl, &setattr->sa_label, NULL); + return nfsd4_decode_fattr4(argp, setattr->sa_bmval, + ARRAY_SIZE(setattr->sa_bmval), + &setattr->sa_iattr, &setattr->sa_acl, + &setattr->sa_label, NULL); } static __be32 nfsd4_decode_setclientid(struct nfsd4_compoundargs *argp, struct nfsd4_setclientid *setclientid) { - DECODE_HEAD; + __be32 *p, status; + + memset(setclientid, 0, sizeof(*setclientid)); if (argp->minorversion >= 1) return nfserr_notsupp; - READ_BUF(NFS4_VERIFIER_SIZE); - COPYMEM(setclientid->se_verf.data, NFS4_VERIFIER_SIZE); - + status = nfsd4_decode_verifier4(argp, &setclientid->se_verf); + if (status) + return status; status = nfsd4_decode_opaque(argp, &setclientid->se_name); if (status) + return status; + if (xdr_stream_decode_u32(argp->xdr, &setclientid->se_callback_prog) < 0) + return nfserr_bad_xdr; + if (xdr_stream_decode_u32(argp->xdr, &setclientid->se_callback_netid_len) < 0) + return nfserr_bad_xdr; + p = xdr_inline_decode(argp->xdr, setclientid->se_callback_netid_len); + if (!p) return nfserr_bad_xdr; - READ_BUF(8); - setclientid->se_callback_prog = be32_to_cpup(p++); - setclientid->se_callback_netid_len = be32_to_cpup(p++); - READ_BUF(setclientid->se_callback_netid_len); - SAVEMEM(setclientid->se_callback_netid_val, setclientid->se_callback_netid_len); - READ_BUF(4); - setclientid->se_callback_addr_len = be32_to_cpup(p++); + setclientid->se_callback_netid_val = svcxdr_savemem(argp, p, + setclientid->se_callback_netid_len); + if (!setclientid->se_callback_netid_val) + return nfserr_jukebox; - READ_BUF(setclientid->se_callback_addr_len); - SAVEMEM(setclientid->se_callback_addr_val, setclientid->se_callback_addr_len); - READ_BUF(4); - setclientid->se_callback_ident = be32_to_cpup(p++); + if (xdr_stream_decode_u32(argp->xdr, &setclientid->se_callback_addr_len) < 0) + return nfserr_bad_xdr; + p = xdr_inline_decode(argp->xdr, setclientid->se_callback_addr_len); + if (!p) + return nfserr_bad_xdr; + setclientid->se_callback_addr_val = svcxdr_savemem(argp, p, + setclientid->se_callback_addr_len); + if (!setclientid->se_callback_addr_val) + return nfserr_jukebox; + if (xdr_stream_decode_u32(argp->xdr, &setclientid->se_callback_ident) < 0) + return nfserr_bad_xdr; - DECODE_TAIL; + return nfs_ok; } static __be32 nfsd4_decode_setclientid_confirm(struct nfsd4_compoundargs *argp, struct nfsd4_setclientid_confirm *scd_c) { - DECODE_HEAD; + __be32 status; if (argp->minorversion >= 1) return nfserr_notsupp; - READ_BUF(8 + NFS4_VERIFIER_SIZE); - COPYMEM(&scd_c->sc_clientid, 8); - COPYMEM(&scd_c->sc_confirm, NFS4_VERIFIER_SIZE); - - DECODE_TAIL; + status = nfsd4_decode_clientid4(argp, &scd_c->sc_clientid); + if (status) + return status; + return nfsd4_decode_verifier4(argp, &scd_c->sc_confirm); } /* Also used for NVERIFY */ static __be32 nfsd4_decode_verify(struct nfsd4_compoundargs *argp, struct nfsd4_verify *verify) { - DECODE_HEAD; + __be32 *p, status; - if ((status = nfsd4_decode_bitmap(argp, verify->ve_bmval))) - goto out; + memset(verify, 0, sizeof(*verify)); + + status = nfsd4_decode_bitmap4(argp, verify->ve_bmval, + ARRAY_SIZE(verify->ve_bmval)); + if (status) + return status; /* For convenience's sake, we compare raw xdr'd attributes in * nfsd4_proc_verify */ - READ_BUF(4); - verify->ve_attrlen = be32_to_cpup(p++); - READ_BUF(verify->ve_attrlen); - SAVEMEM(verify->ve_attrval, verify->ve_attrlen); + if (xdr_stream_decode_u32(argp->xdr, &verify->ve_attrlen) < 0) + return nfserr_bad_xdr; + p = xdr_inline_decode(argp->xdr, verify->ve_attrlen); + if (!p) + return nfserr_bad_xdr; + verify->ve_attrval = svcxdr_savemem(argp, p, verify->ve_attrlen); + if (!verify->ve_attrval) + return nfserr_jukebox; - DECODE_TAIL; + return nfs_ok; } static __be32 nfsd4_decode_write(struct nfsd4_compoundargs *argp, struct nfsd4_write *write) { - int avail; - int len; - DECODE_HEAD; + __be32 status; - status = nfsd4_decode_stateid(argp, &write->wr_stateid); + status = nfsd4_decode_stateid4(argp, &write->wr_stateid); if (status) return status; - READ_BUF(16); - p = xdr_decode_hyper(p, &write->wr_offset); - write->wr_stable_how = be32_to_cpup(p++); + if (xdr_stream_decode_u64(argp->xdr, &write->wr_offset) < 0) + return nfserr_bad_xdr; + if (xdr_stream_decode_u32(argp->xdr, &write->wr_stable_how) < 0) + return nfserr_bad_xdr; if (write->wr_stable_how > NFS_FILE_SYNC) - goto xdr_error; - write->wr_buflen = be32_to_cpup(p++); - - /* Sorry .. no magic macros for this.. * - * READ_BUF(write->wr_buflen); - * SAVEMEM(write->wr_buf, write->wr_buflen); - */ - avail = (char*)argp->end - (char*)argp->p; - if (avail + argp->pagelen < write->wr_buflen) { - dprintk("NFSD: xdr error (%s:%d)\n", - __FILE__, __LINE__); - goto xdr_error; - } - write->wr_head.iov_base = p; - write->wr_head.iov_len = avail; - write->wr_pagelist = argp->pagelist; - - len = XDR_QUADLEN(write->wr_buflen) << 2; - if (len >= avail) { - int pages; - - len -= avail; - - pages = len >> PAGE_SHIFT; - argp->pagelist += pages; - argp->pagelen -= pages * PAGE_SIZE; - len -= pages * PAGE_SIZE; - - next_decode_page(argp); - } - argp->p += XDR_QUADLEN(len); + return nfserr_bad_xdr; + if (xdr_stream_decode_u32(argp->xdr, &write->wr_buflen) < 0) + return nfserr_bad_xdr; + if (!xdr_stream_subsegment(argp->xdr, &write->wr_payload, write->wr_buflen)) + return nfserr_bad_xdr; - DECODE_TAIL; + write->wr_bytes_written = 0; + write->wr_how_written = 0; + memset(&write->wr_verifier, 0, sizeof(write->wr_verifier)); + return nfs_ok; } static __be32 nfsd4_decode_release_lockowner(struct nfsd4_compoundargs *argp, struct nfsd4_release_lockowner *rlockowner) { - DECODE_HEAD; + __be32 status; if (argp->minorversion >= 1) return nfserr_notsupp; - READ_BUF(12); - COPYMEM(&rlockowner->rl_clientid, sizeof(clientid_t)); - rlockowner->rl_owner.len = be32_to_cpup(p++); - READ_BUF(rlockowner->rl_owner.len); - READMEM(rlockowner->rl_owner.data, rlockowner->rl_owner.len); + status = nfsd4_decode_state_owner4(argp, &rlockowner->rl_clientid, + &rlockowner->rl_owner); + if (status) + return status; if (argp->minorversion && !zero_clientid(&rlockowner->rl_clientid)) return nfserr_inval; - DECODE_TAIL; + + return nfs_ok; +} + +static __be32 nfsd4_decode_backchannel_ctl(struct nfsd4_compoundargs *argp, struct nfsd4_backchannel_ctl *bc) +{ + memset(bc, 0, sizeof(*bc)); + if (xdr_stream_decode_u32(argp->xdr, &bc->bc_cb_program) < 0) + return nfserr_bad_xdr; + return nfsd4_decode_cb_sec(argp, &bc->bc_cb_sec); +} + +static __be32 nfsd4_decode_bind_conn_to_session(struct nfsd4_compoundargs *argp, struct nfsd4_bind_conn_to_session *bcts) +{ + u32 use_conn_in_rdma_mode; + __be32 status; + + memset(bcts, 0, sizeof(*bcts)); + status = nfsd4_decode_sessionid4(argp, &bcts->sessionid); + if (status) + return status; + if (xdr_stream_decode_u32(argp->xdr, &bcts->dir) < 0) + return nfserr_bad_xdr; + if (xdr_stream_decode_u32(argp->xdr, &use_conn_in_rdma_mode) < 0) + return nfserr_bad_xdr; + + return nfs_ok; } static __be32 -nfsd4_decode_exchange_id(struct nfsd4_compoundargs *argp, - struct nfsd4_exchange_id *exid) +nfsd4_decode_state_protect_ops(struct nfsd4_compoundargs *argp, + struct nfsd4_exchange_id *exid) { - int dummy, tmp; - DECODE_HEAD; + __be32 status; + + status = nfsd4_decode_bitmap4(argp, exid->spo_must_enforce, + ARRAY_SIZE(exid->spo_must_enforce)); + if (status) + return nfserr_bad_xdr; + status = nfsd4_decode_bitmap4(argp, exid->spo_must_allow, + ARRAY_SIZE(exid->spo_must_allow)); + if (status) + return nfserr_bad_xdr; - READ_BUF(NFS4_VERIFIER_SIZE); - COPYMEM(exid->verifier.data, NFS4_VERIFIER_SIZE); + return nfs_ok; +} - status = nfsd4_decode_opaque(argp, &exid->clname); +/* + * This implementation currently does not support SP4_SSV. + * This decoder simply skips over these arguments. + */ +static noinline __be32 +nfsd4_decode_ssv_sp_parms(struct nfsd4_compoundargs *argp, + struct nfsd4_exchange_id *exid) +{ + u32 count, window, num_gss_handles; + __be32 status; + + /* ssp_ops */ + status = nfsd4_decode_state_protect_ops(argp, exid); if (status) + return status; + + /* ssp_hash_algs<> */ + if (xdr_stream_decode_u32(argp->xdr, &count) < 0) + return nfserr_bad_xdr; + while (count--) { + status = nfsd4_decode_ignored_string(argp, 0); + if (status) + return status; + } + + /* ssp_encr_algs<> */ + if (xdr_stream_decode_u32(argp->xdr, &count) < 0) + return nfserr_bad_xdr; + while (count--) { + status = nfsd4_decode_ignored_string(argp, 0); + if (status) + return status; + } + + if (xdr_stream_decode_u32(argp->xdr, &window) < 0) return nfserr_bad_xdr; + if (xdr_stream_decode_u32(argp->xdr, &num_gss_handles) < 0) + return nfserr_bad_xdr; + + return nfs_ok; +} - READ_BUF(4); - exid->flags = be32_to_cpup(p++); +static __be32 +nfsd4_decode_state_protect4_a(struct nfsd4_compoundargs *argp, + struct nfsd4_exchange_id *exid) +{ + __be32 status; - /* Ignore state_protect4_a */ - READ_BUF(4); - exid->spa_how = be32_to_cpup(p++); + if (xdr_stream_decode_u32(argp->xdr, &exid->spa_how) < 0) + return nfserr_bad_xdr; switch (exid->spa_how) { case SP4_NONE: break; case SP4_MACH_CRED: - /* spo_must_enforce */ - status = nfsd4_decode_bitmap(argp, - exid->spo_must_enforce); - if (status) - goto out; - /* spo_must_allow */ - status = nfsd4_decode_bitmap(argp, exid->spo_must_allow); + status = nfsd4_decode_state_protect_ops(argp, exid); if (status) - goto out; + return status; break; case SP4_SSV: - /* ssp_ops */ - READ_BUF(4); - dummy = be32_to_cpup(p++); - READ_BUF(dummy * 4); - p += dummy; - - READ_BUF(4); - dummy = be32_to_cpup(p++); - READ_BUF(dummy * 4); - p += dummy; - - /* ssp_hash_algs<> */ - READ_BUF(4); - tmp = be32_to_cpup(p++); - while (tmp--) { - READ_BUF(4); - dummy = be32_to_cpup(p++); - READ_BUF(dummy); - p += XDR_QUADLEN(dummy); - } - - /* ssp_encr_algs<> */ - READ_BUF(4); - tmp = be32_to_cpup(p++); - while (tmp--) { - READ_BUF(4); - dummy = be32_to_cpup(p++); - READ_BUF(dummy); - p += XDR_QUADLEN(dummy); - } - - /* ignore ssp_window and ssp_num_gss_handles: */ - READ_BUF(8); + status = nfsd4_decode_ssv_sp_parms(argp, exid); + if (status) + return status; break; default: - goto xdr_error; + return nfserr_bad_xdr; } - READ_BUF(4); /* nfs_impl_id4 array length */ - dummy = be32_to_cpup(p++); + return nfs_ok; +} - if (dummy > 1) - goto xdr_error; +static __be32 +nfsd4_decode_nfs_impl_id4(struct nfsd4_compoundargs *argp, + struct nfsd4_exchange_id *exid) +{ + __be32 status; + u32 count; - if (dummy == 1) { + if (xdr_stream_decode_u32(argp->xdr, &count) < 0) + return nfserr_bad_xdr; + switch (count) { + case 0: + break; + case 1: + /* Note that RFC 8881 places no length limit on + * nii_domain, but this implementation permits no + * more than NFS4_OPAQUE_LIMIT bytes */ status = nfsd4_decode_opaque(argp, &exid->nii_domain); if (status) - goto xdr_error; - - /* nii_name */ + return status; + /* Note that RFC 8881 places no length limit on + * nii_name, but this implementation permits no + * more than NFS4_OPAQUE_LIMIT bytes */ status = nfsd4_decode_opaque(argp, &exid->nii_name); if (status) - goto xdr_error; - - /* nii_date */ - status = nfsd4_decode_time(argp, &exid->nii_time); + return status; + status = nfsd4_decode_nfstime4(argp, &exid->nii_time); if (status) - goto xdr_error; + return status; + break; + default: + return nfserr_bad_xdr; } - DECODE_TAIL; -} -static __be32 -nfsd4_decode_create_session(struct nfsd4_compoundargs *argp, - struct nfsd4_create_session *sess) -{ - DECODE_HEAD; - - READ_BUF(16); - COPYMEM(&sess->clientid, 8); - sess->seqid = be32_to_cpup(p++); - sess->flags = be32_to_cpup(p++); - - /* Fore channel attrs */ - READ_BUF(28); - p++; /* headerpadsz is always 0 */ - sess->fore_channel.maxreq_sz = be32_to_cpup(p++); - sess->fore_channel.maxresp_sz = be32_to_cpup(p++); - sess->fore_channel.maxresp_cached = be32_to_cpup(p++); - sess->fore_channel.maxops = be32_to_cpup(p++); - sess->fore_channel.maxreqs = be32_to_cpup(p++); - sess->fore_channel.nr_rdma_attrs = be32_to_cpup(p++); - if (sess->fore_channel.nr_rdma_attrs == 1) { - READ_BUF(4); - sess->fore_channel.rdma_attrs = be32_to_cpup(p++); - } else if (sess->fore_channel.nr_rdma_attrs > 1) { - dprintk("Too many fore channel attr bitmaps!\n"); - goto xdr_error; - } - - /* Back channel attrs */ - READ_BUF(28); - p++; /* headerpadsz is always 0 */ - sess->back_channel.maxreq_sz = be32_to_cpup(p++); - sess->back_channel.maxresp_sz = be32_to_cpup(p++); - sess->back_channel.maxresp_cached = be32_to_cpup(p++); - sess->back_channel.maxops = be32_to_cpup(p++); - sess->back_channel.maxreqs = be32_to_cpup(p++); - sess->back_channel.nr_rdma_attrs = be32_to_cpup(p++); - if (sess->back_channel.nr_rdma_attrs == 1) { - READ_BUF(4); - sess->back_channel.rdma_attrs = be32_to_cpup(p++); - } else if (sess->back_channel.nr_rdma_attrs > 1) { - dprintk("Too many back channel attr bitmaps!\n"); - goto xdr_error; - } - - READ_BUF(4); - sess->callback_prog = be32_to_cpup(p++); - nfsd4_decode_cb_sec(argp, &sess->cb_sec); - DECODE_TAIL; + return nfs_ok; } static __be32 -nfsd4_decode_destroy_session(struct nfsd4_compoundargs *argp, - struct nfsd4_destroy_session *destroy_session) +nfsd4_decode_exchange_id(struct nfsd4_compoundargs *argp, + struct nfsd4_exchange_id *exid) { - DECODE_HEAD; - READ_BUF(NFS4_MAX_SESSIONID_LEN); - COPYMEM(destroy_session->sessionid.data, NFS4_MAX_SESSIONID_LEN); + __be32 status; - DECODE_TAIL; + memset(exid, 0, sizeof(*exid)); + status = nfsd4_decode_verifier4(argp, &exid->verifier); + if (status) + return status; + status = nfsd4_decode_opaque(argp, &exid->clname); + if (status) + return status; + if (xdr_stream_decode_u32(argp->xdr, &exid->flags) < 0) + return nfserr_bad_xdr; + status = nfsd4_decode_state_protect4_a(argp, exid); + if (status) + return status; + return nfsd4_decode_nfs_impl_id4(argp, exid); } static __be32 -nfsd4_decode_free_stateid(struct nfsd4_compoundargs *argp, - struct nfsd4_free_stateid *free_stateid) +nfsd4_decode_channel_attrs4(struct nfsd4_compoundargs *argp, + struct nfsd4_channel_attrs *ca) { - DECODE_HEAD; - - READ_BUF(sizeof(stateid_t)); - free_stateid->fr_stateid.si_generation = be32_to_cpup(p++); - COPYMEM(&free_stateid->fr_stateid.si_opaque, sizeof(stateid_opaque_t)); - - DECODE_TAIL; -} + __be32 *p; -static __be32 -nfsd4_decode_sequence(struct nfsd4_compoundargs *argp, - struct nfsd4_sequence *seq) -{ - DECODE_HEAD; + p = xdr_inline_decode(argp->xdr, XDR_UNIT * 7); + if (!p) + return nfserr_bad_xdr; - READ_BUF(NFS4_MAX_SESSIONID_LEN + 16); - COPYMEM(seq->sessionid.data, NFS4_MAX_SESSIONID_LEN); - seq->seqid = be32_to_cpup(p++); - seq->slotid = be32_to_cpup(p++); - seq->maxslots = be32_to_cpup(p++); - seq->cachethis = be32_to_cpup(p++); + /* headerpadsz is ignored */ + p++; + ca->maxreq_sz = be32_to_cpup(p++); + ca->maxresp_sz = be32_to_cpup(p++); + ca->maxresp_cached = be32_to_cpup(p++); + ca->maxops = be32_to_cpup(p++); + ca->maxreqs = be32_to_cpup(p++); + ca->nr_rdma_attrs = be32_to_cpup(p); + switch (ca->nr_rdma_attrs) { + case 0: + break; + case 1: + if (xdr_stream_decode_u32(argp->xdr, &ca->rdma_attrs) < 0) + return nfserr_bad_xdr; + break; + default: + return nfserr_bad_xdr; + } - DECODE_TAIL; + return nfs_ok; } static __be32 -nfsd4_decode_test_stateid(struct nfsd4_compoundargs *argp, struct nfsd4_test_stateid *test_stateid) +nfsd4_decode_create_session(struct nfsd4_compoundargs *argp, + struct nfsd4_create_session *sess) { - int i; - __be32 *p, status; - struct nfsd4_test_stateid_id *stateid; - - READ_BUF(4); - test_stateid->ts_num_ids = ntohl(*p++); - - INIT_LIST_HEAD(&test_stateid->ts_stateid_list); - - for (i = 0; i < test_stateid->ts_num_ids; i++) { - stateid = svcxdr_tmpalloc(argp, sizeof(*stateid)); - if (!stateid) { - status = nfserrno(-ENOMEM); - goto out; - } - - INIT_LIST_HEAD(&stateid->ts_id_list); - list_add_tail(&stateid->ts_id_list, &test_stateid->ts_stateid_list); - - status = nfsd4_decode_stateid(argp, &stateid->ts_id_stateid); - if (status) - goto out; - } + __be32 status; - status = 0; -out: - return status; -xdr_error: - dprintk("NFSD: xdr error (%s:%d)\n", __FILE__, __LINE__); - status = nfserr_bad_xdr; - goto out; + memset(sess, 0, sizeof(*sess)); + status = nfsd4_decode_clientid4(argp, &sess->clientid); + if (status) + return status; + if (xdr_stream_decode_u32(argp->xdr, &sess->seqid) < 0) + return nfserr_bad_xdr; + if (xdr_stream_decode_u32(argp->xdr, &sess->flags) < 0) + return nfserr_bad_xdr; + status = nfsd4_decode_channel_attrs4(argp, &sess->fore_channel); + if (status) + return status; + status = nfsd4_decode_channel_attrs4(argp, &sess->back_channel); + if (status) + return status; + if (xdr_stream_decode_u32(argp->xdr, &sess->callback_prog) < 0) + return nfserr_bad_xdr; + return nfsd4_decode_cb_sec(argp, &sess->cb_sec); } -static __be32 nfsd4_decode_destroy_clientid(struct nfsd4_compoundargs *argp, struct nfsd4_destroy_clientid *dc) +static __be32 +nfsd4_decode_destroy_session(struct nfsd4_compoundargs *argp, + struct nfsd4_destroy_session *destroy_session) { - DECODE_HEAD; - - READ_BUF(8); - COPYMEM(&dc->clientid, 8); - - DECODE_TAIL; + return nfsd4_decode_sessionid4(argp, &destroy_session->sessionid); } -static __be32 nfsd4_decode_reclaim_complete(struct nfsd4_compoundargs *argp, struct nfsd4_reclaim_complete *rc) +static __be32 +nfsd4_decode_free_stateid(struct nfsd4_compoundargs *argp, + struct nfsd4_free_stateid *free_stateid) { - DECODE_HEAD; - - READ_BUF(4); - rc->rca_one_fs = be32_to_cpup(p++); - - DECODE_TAIL; + return nfsd4_decode_stateid4(argp, &free_stateid->fr_stateid); } #ifdef CONFIG_NFSD_PNFS @@ -1579,244 +1698,276 @@ static __be32 nfsd4_decode_getdeviceinfo(struct nfsd4_compoundargs *argp, struct nfsd4_getdeviceinfo *gdev) { - DECODE_HEAD; - u32 num, i; - - READ_BUF(sizeof(struct nfsd4_deviceid) + 3 * 4); - COPYMEM(&gdev->gd_devid, sizeof(struct nfsd4_deviceid)); - gdev->gd_layout_type = be32_to_cpup(p++); - gdev->gd_maxcount = be32_to_cpup(p++); - num = be32_to_cpup(p++); - if (num) { - if (num > 1000) - goto xdr_error; - READ_BUF(4 * num); - gdev->gd_notify_types = be32_to_cpup(p++); - for (i = 1; i < num; i++) { - if (be32_to_cpup(p++)) { - status = nfserr_inval; - goto out; - } - } - } - DECODE_TAIL; -} - -static __be32 -nfsd4_decode_layoutget(struct nfsd4_compoundargs *argp, - struct nfsd4_layoutget *lgp) -{ - DECODE_HEAD; - - READ_BUF(36); - lgp->lg_signal = be32_to_cpup(p++); - lgp->lg_layout_type = be32_to_cpup(p++); - lgp->lg_seg.iomode = be32_to_cpup(p++); - p = xdr_decode_hyper(p, &lgp->lg_seg.offset); - p = xdr_decode_hyper(p, &lgp->lg_seg.length); - p = xdr_decode_hyper(p, &lgp->lg_minlength); + __be32 status; - status = nfsd4_decode_stateid(argp, &lgp->lg_sid); + memset(gdev, 0, sizeof(*gdev)); + status = nfsd4_decode_deviceid4(argp, &gdev->gd_devid); if (status) return status; + if (xdr_stream_decode_u32(argp->xdr, &gdev->gd_layout_type) < 0) + return nfserr_bad_xdr; + if (xdr_stream_decode_u32(argp->xdr, &gdev->gd_maxcount) < 0) + return nfserr_bad_xdr; + if (xdr_stream_decode_uint32_array(argp->xdr, + &gdev->gd_notify_types, 1) < 0) + return nfserr_bad_xdr; - READ_BUF(4); - lgp->lg_maxcount = be32_to_cpup(p++); - - DECODE_TAIL; + return nfs_ok; } static __be32 nfsd4_decode_layoutcommit(struct nfsd4_compoundargs *argp, - struct nfsd4_layoutcommit *lcp) + struct nfsd4_layoutcommit *lcp) { - DECODE_HEAD; - u32 timechange; - - READ_BUF(20); - p = xdr_decode_hyper(p, &lcp->lc_seg.offset); - p = xdr_decode_hyper(p, &lcp->lc_seg.length); - lcp->lc_reclaim = be32_to_cpup(p++); + __be32 *p, status; - status = nfsd4_decode_stateid(argp, &lcp->lc_sid); + memset(lcp, 0, sizeof(*lcp)); + if (xdr_stream_decode_u64(argp->xdr, &lcp->lc_seg.offset) < 0) + return nfserr_bad_xdr; + if (xdr_stream_decode_u64(argp->xdr, &lcp->lc_seg.length) < 0) + return nfserr_bad_xdr; + if (xdr_stream_decode_bool(argp->xdr, &lcp->lc_reclaim) < 0) + return nfserr_bad_xdr; + status = nfsd4_decode_stateid4(argp, &lcp->lc_sid); if (status) return status; - - READ_BUF(4); - lcp->lc_newoffset = be32_to_cpup(p++); + if (xdr_stream_decode_u32(argp->xdr, &lcp->lc_newoffset) < 0) + return nfserr_bad_xdr; if (lcp->lc_newoffset) { - READ_BUF(8); - p = xdr_decode_hyper(p, &lcp->lc_last_wr); + if (xdr_stream_decode_u64(argp->xdr, &lcp->lc_last_wr) < 0) + return nfserr_bad_xdr; } else lcp->lc_last_wr = 0; - READ_BUF(4); - timechange = be32_to_cpup(p++); - if (timechange) { - status = nfsd4_decode_time(argp, &lcp->lc_mtime); + p = xdr_inline_decode(argp->xdr, XDR_UNIT); + if (!p) + return nfserr_bad_xdr; + if (xdr_item_is_present(p)) { + status = nfsd4_decode_nfstime4(argp, &lcp->lc_mtime); if (status) return status; } else { lcp->lc_mtime.tv_nsec = UTIME_NOW; } - READ_BUF(8); - lcp->lc_layout_type = be32_to_cpup(p++); + return nfsd4_decode_layoutupdate4(argp, lcp); +} - /* - * Save the layout update in XDR format and let the layout driver deal - * with it later. - */ - lcp->lc_up_len = be32_to_cpup(p++); - if (lcp->lc_up_len > 0) { - READ_BUF(lcp->lc_up_len); - READMEM(lcp->lc_up_layout, lcp->lc_up_len); - } +static __be32 +nfsd4_decode_layoutget(struct nfsd4_compoundargs *argp, + struct nfsd4_layoutget *lgp) +{ + __be32 status; + + memset(lgp, 0, sizeof(*lgp)); + if (xdr_stream_decode_u32(argp->xdr, &lgp->lg_signal) < 0) + return nfserr_bad_xdr; + if (xdr_stream_decode_u32(argp->xdr, &lgp->lg_layout_type) < 0) + return nfserr_bad_xdr; + if (xdr_stream_decode_u32(argp->xdr, &lgp->lg_seg.iomode) < 0) + return nfserr_bad_xdr; + if (xdr_stream_decode_u64(argp->xdr, &lgp->lg_seg.offset) < 0) + return nfserr_bad_xdr; + if (xdr_stream_decode_u64(argp->xdr, &lgp->lg_seg.length) < 0) + return nfserr_bad_xdr; + if (xdr_stream_decode_u64(argp->xdr, &lgp->lg_minlength) < 0) + return nfserr_bad_xdr; + status = nfsd4_decode_stateid4(argp, &lgp->lg_sid); + if (status) + return status; + if (xdr_stream_decode_u32(argp->xdr, &lgp->lg_maxcount) < 0) + return nfserr_bad_xdr; - DECODE_TAIL; + return nfs_ok; } static __be32 nfsd4_decode_layoutreturn(struct nfsd4_compoundargs *argp, struct nfsd4_layoutreturn *lrp) { - DECODE_HEAD; - - READ_BUF(16); - lrp->lr_reclaim = be32_to_cpup(p++); - lrp->lr_layout_type = be32_to_cpup(p++); - lrp->lr_seg.iomode = be32_to_cpup(p++); - lrp->lr_return_type = be32_to_cpup(p++); - if (lrp->lr_return_type == RETURN_FILE) { - READ_BUF(16); - p = xdr_decode_hyper(p, &lrp->lr_seg.offset); - p = xdr_decode_hyper(p, &lrp->lr_seg.length); - - status = nfsd4_decode_stateid(argp, &lrp->lr_sid); - if (status) - return status; + memset(lrp, 0, sizeof(*lrp)); + if (xdr_stream_decode_bool(argp->xdr, &lrp->lr_reclaim) < 0) + return nfserr_bad_xdr; + if (xdr_stream_decode_u32(argp->xdr, &lrp->lr_layout_type) < 0) + return nfserr_bad_xdr; + if (xdr_stream_decode_u32(argp->xdr, &lrp->lr_seg.iomode) < 0) + return nfserr_bad_xdr; + return nfsd4_decode_layoutreturn4(argp, lrp); +} +#endif /* CONFIG_NFSD_PNFS */ - READ_BUF(4); - lrp->lrf_body_len = be32_to_cpup(p++); - if (lrp->lrf_body_len > 0) { - READ_BUF(lrp->lrf_body_len); - READMEM(lrp->lrf_body, lrp->lrf_body_len); - } - } else { - lrp->lr_seg.offset = 0; - lrp->lr_seg.length = NFS4_MAX_UINT64; - } +static __be32 nfsd4_decode_secinfo_no_name(struct nfsd4_compoundargs *argp, + struct nfsd4_secinfo_no_name *sin) +{ + if (xdr_stream_decode_u32(argp->xdr, &sin->sin_style) < 0) + return nfserr_bad_xdr; - DECODE_TAIL; + sin->sin_exp = NULL; + return nfs_ok; } -#endif /* CONFIG_NFSD_PNFS */ static __be32 -nfsd4_decode_fallocate(struct nfsd4_compoundargs *argp, - struct nfsd4_fallocate *fallocate) +nfsd4_decode_sequence(struct nfsd4_compoundargs *argp, + struct nfsd4_sequence *seq) { - DECODE_HEAD; + __be32 *p, status; - status = nfsd4_decode_stateid(argp, &fallocate->falloc_stateid); + status = nfsd4_decode_sessionid4(argp, &seq->sessionid); if (status) return status; + p = xdr_inline_decode(argp->xdr, XDR_UNIT * 4); + if (!p) + return nfserr_bad_xdr; + seq->seqid = be32_to_cpup(p++); + seq->slotid = be32_to_cpup(p++); + seq->maxslots = be32_to_cpup(p++); + seq->cachethis = be32_to_cpup(p); + + seq->status_flags = 0; + return nfs_ok; +} + +static __be32 +nfsd4_decode_test_stateid(struct nfsd4_compoundargs *argp, struct nfsd4_test_stateid *test_stateid) +{ + struct nfsd4_test_stateid_id *stateid; + __be32 status; + u32 i; + + memset(test_stateid, 0, sizeof(*test_stateid)); + if (xdr_stream_decode_u32(argp->xdr, &test_stateid->ts_num_ids) < 0) + return nfserr_bad_xdr; + + INIT_LIST_HEAD(&test_stateid->ts_stateid_list); + for (i = 0; i < test_stateid->ts_num_ids; i++) { + stateid = svcxdr_tmpalloc(argp, sizeof(*stateid)); + if (!stateid) + return nfserr_jukebox; + INIT_LIST_HEAD(&stateid->ts_id_list); + list_add_tail(&stateid->ts_id_list, &test_stateid->ts_stateid_list); + status = nfsd4_decode_stateid4(argp, &stateid->ts_id_stateid); + if (status) + return status; + } + + return nfs_ok; +} - READ_BUF(16); - p = xdr_decode_hyper(p, &fallocate->falloc_offset); - xdr_decode_hyper(p, &fallocate->falloc_length); +static __be32 nfsd4_decode_destroy_clientid(struct nfsd4_compoundargs *argp, + struct nfsd4_destroy_clientid *dc) +{ + return nfsd4_decode_clientid4(argp, &dc->clientid); +} - DECODE_TAIL; +static __be32 nfsd4_decode_reclaim_complete(struct nfsd4_compoundargs *argp, + struct nfsd4_reclaim_complete *rc) +{ + if (xdr_stream_decode_bool(argp->xdr, &rc->rca_one_fs) < 0) + return nfserr_bad_xdr; + return nfs_ok; } static __be32 -nfsd4_decode_clone(struct nfsd4_compoundargs *argp, struct nfsd4_clone *clone) +nfsd4_decode_fallocate(struct nfsd4_compoundargs *argp, + struct nfsd4_fallocate *fallocate) { - DECODE_HEAD; + __be32 status; - status = nfsd4_decode_stateid(argp, &clone->cl_src_stateid); - if (status) - return status; - status = nfsd4_decode_stateid(argp, &clone->cl_dst_stateid); + status = nfsd4_decode_stateid4(argp, &fallocate->falloc_stateid); if (status) return status; + if (xdr_stream_decode_u64(argp->xdr, &fallocate->falloc_offset) < 0) + return nfserr_bad_xdr; + if (xdr_stream_decode_u64(argp->xdr, &fallocate->falloc_length) < 0) + return nfserr_bad_xdr; - READ_BUF(8 + 8 + 8); - p = xdr_decode_hyper(p, &clone->cl_src_pos); - p = xdr_decode_hyper(p, &clone->cl_dst_pos); - p = xdr_decode_hyper(p, &clone->cl_count); - DECODE_TAIL; + return nfs_ok; } static __be32 nfsd4_decode_nl4_server(struct nfsd4_compoundargs *argp, struct nl4_server *ns) { - DECODE_HEAD; struct nfs42_netaddr *naddr; + __be32 *p; - READ_BUF(4); - ns->nl4_type = be32_to_cpup(p++); + if (xdr_stream_decode_u32(argp->xdr, &ns->nl4_type) < 0) + return nfserr_bad_xdr; /* currently support for 1 inter-server source server */ switch (ns->nl4_type) { case NL4_NETADDR: naddr = &ns->u.nl4_addr; - READ_BUF(4); - naddr->netid_len = be32_to_cpup(p++); + if (xdr_stream_decode_u32(argp->xdr, &naddr->netid_len) < 0) + return nfserr_bad_xdr; if (naddr->netid_len > RPCBIND_MAXNETIDLEN) - goto xdr_error; + return nfserr_bad_xdr; - READ_BUF(naddr->netid_len + 4); /* 4 for uaddr len */ - COPYMEM(naddr->netid, naddr->netid_len); + p = xdr_inline_decode(argp->xdr, naddr->netid_len); + if (!p) + return nfserr_bad_xdr; + memcpy(naddr->netid, p, naddr->netid_len); - naddr->addr_len = be32_to_cpup(p++); + if (xdr_stream_decode_u32(argp->xdr, &naddr->addr_len) < 0) + return nfserr_bad_xdr; if (naddr->addr_len > RPCBIND_MAXUADDRLEN) - goto xdr_error; + return nfserr_bad_xdr; - READ_BUF(naddr->addr_len); - COPYMEM(naddr->addr, naddr->addr_len); + p = xdr_inline_decode(argp->xdr, naddr->addr_len); + if (!p) + return nfserr_bad_xdr; + memcpy(naddr->addr, p, naddr->addr_len); break; default: - goto xdr_error; + return nfserr_bad_xdr; } - DECODE_TAIL; + + return nfs_ok; } static __be32 nfsd4_decode_copy(struct nfsd4_compoundargs *argp, struct nfsd4_copy *copy) { - DECODE_HEAD; + u32 consecutive, i, count, sync; struct nl4_server *ns_dummy; - int i, count; + __be32 status; - status = nfsd4_decode_stateid(argp, ©->cp_src_stateid); + memset(copy, 0, sizeof(*copy)); + status = nfsd4_decode_stateid4(argp, ©->cp_src_stateid); if (status) return status; - status = nfsd4_decode_stateid(argp, ©->cp_dst_stateid); + status = nfsd4_decode_stateid4(argp, ©->cp_dst_stateid); if (status) return status; + if (xdr_stream_decode_u64(argp->xdr, ©->cp_src_pos) < 0) + return nfserr_bad_xdr; + if (xdr_stream_decode_u64(argp->xdr, ©->cp_dst_pos) < 0) + return nfserr_bad_xdr; + if (xdr_stream_decode_u64(argp->xdr, ©->cp_count) < 0) + return nfserr_bad_xdr; + /* ca_consecutive: we always do consecutive copies */ + if (xdr_stream_decode_u32(argp->xdr, &consecutive) < 0) + return nfserr_bad_xdr; + if (xdr_stream_decode_bool(argp->xdr, &sync) < 0) + return nfserr_bad_xdr; + nfsd4_copy_set_sync(copy, sync); - READ_BUF(8 + 8 + 8 + 4 + 4 + 4); - p = xdr_decode_hyper(p, ©->cp_src_pos); - p = xdr_decode_hyper(p, ©->cp_dst_pos); - p = xdr_decode_hyper(p, ©->cp_count); - p++; /* ca_consecutive: we always do consecutive copies */ - copy->cp_synchronous = be32_to_cpup(p++); - - count = be32_to_cpup(p++); - - copy->cp_intra = false; + if (xdr_stream_decode_u32(argp->xdr, &count) < 0) + return nfserr_bad_xdr; + copy->cp_src = svcxdr_tmpalloc(argp, sizeof(*copy->cp_src)); + if (copy->cp_src == NULL) + return nfserr_jukebox; if (count == 0) { /* intra-server copy */ - copy->cp_intra = true; - goto intra; + __set_bit(NFSD4_COPY_F_INTRA, ©->cp_flags); + return nfs_ok; } - /* decode all the supplied server addresses but use first */ - status = nfsd4_decode_nl4_server(argp, ©->cp_src); + /* decode all the supplied server addresses but use only the first */ + status = nfsd4_decode_nl4_server(argp, copy->cp_src); if (status) return status; ns_dummy = kmalloc(sizeof(struct nl4_server), GFP_KERNEL); if (ns_dummy == NULL) - return nfserrno(-ENOMEM); + return nfserr_jukebox; for (i = 0; i < count - 1; i++) { status = nfsd4_decode_nl4_server(argp, ns_dummy); if (status) { @@ -1825,44 +1976,279 @@ nfsd4_decode_copy(struct nfsd4_compoundargs *argp, struct nfsd4_copy *copy) } } kfree(ns_dummy); -intra: - DECODE_TAIL; + return nfs_ok; +} + +static __be32 +nfsd4_decode_copy_notify(struct nfsd4_compoundargs *argp, + struct nfsd4_copy_notify *cn) +{ + __be32 status; + + memset(cn, 0, sizeof(*cn)); + cn->cpn_src = svcxdr_tmpalloc(argp, sizeof(*cn->cpn_src)); + if (cn->cpn_src == NULL) + return nfserr_jukebox; + cn->cpn_dst = svcxdr_tmpalloc(argp, sizeof(*cn->cpn_dst)); + if (cn->cpn_dst == NULL) + return nfserr_jukebox; + + status = nfsd4_decode_stateid4(argp, &cn->cpn_src_stateid); + if (status) + return status; + return nfsd4_decode_nl4_server(argp, cn->cpn_dst); } static __be32 nfsd4_decode_offload_status(struct nfsd4_compoundargs *argp, struct nfsd4_offload_status *os) { - return nfsd4_decode_stateid(argp, &os->stateid); + os->count = 0; + os->status = 0; + return nfsd4_decode_stateid4(argp, &os->stateid); } static __be32 -nfsd4_decode_copy_notify(struct nfsd4_compoundargs *argp, - struct nfsd4_copy_notify *cn) +nfsd4_decode_seek(struct nfsd4_compoundargs *argp, struct nfsd4_seek *seek) { - int status; + __be32 status; - status = nfsd4_decode_stateid(argp, &cn->cpn_src_stateid); + status = nfsd4_decode_stateid4(argp, &seek->seek_stateid); if (status) return status; - return nfsd4_decode_nl4_server(argp, &cn->cpn_dst); + if (xdr_stream_decode_u64(argp->xdr, &seek->seek_offset) < 0) + return nfserr_bad_xdr; + if (xdr_stream_decode_u32(argp->xdr, &seek->seek_whence) < 0) + return nfserr_bad_xdr; + + seek->seek_eof = 0; + seek->seek_pos = 0; + return nfs_ok; } static __be32 -nfsd4_decode_seek(struct nfsd4_compoundargs *argp, struct nfsd4_seek *seek) +nfsd4_decode_clone(struct nfsd4_compoundargs *argp, struct nfsd4_clone *clone) +{ + __be32 status; + + status = nfsd4_decode_stateid4(argp, &clone->cl_src_stateid); + if (status) + return status; + status = nfsd4_decode_stateid4(argp, &clone->cl_dst_stateid); + if (status) + return status; + if (xdr_stream_decode_u64(argp->xdr, &clone->cl_src_pos) < 0) + return nfserr_bad_xdr; + if (xdr_stream_decode_u64(argp->xdr, &clone->cl_dst_pos) < 0) + return nfserr_bad_xdr; + if (xdr_stream_decode_u64(argp->xdr, &clone->cl_count) < 0) + return nfserr_bad_xdr; + + return nfs_ok; +} + +/* + * XDR data that is more than PAGE_SIZE in size is normally part of a + * read or write. However, the size of extended attributes is limited + * by the maximum request size, and then further limited by the underlying + * filesystem limits. This can exceed PAGE_SIZE (currently, XATTR_SIZE_MAX + * is 64k). Since there is no kvec- or page-based interface to xattrs, + * and we're not dealing with contiguous pages, we need to do some copying. + */ + +/* + * Decode data into buffer. + */ +static __be32 +nfsd4_vbuf_from_vector(struct nfsd4_compoundargs *argp, struct xdr_buf *xdr, + char **bufp, u32 buflen) +{ + struct page **pages = xdr->pages; + struct kvec *head = xdr->head; + char *tmp, *dp; + u32 len; + + if (buflen <= head->iov_len) { + /* + * We're in luck, the head has enough space. Just return + * the head, no need for copying. + */ + *bufp = head->iov_base; + return 0; + } + + tmp = svcxdr_tmpalloc(argp, buflen); + if (tmp == NULL) + return nfserr_jukebox; + + dp = tmp; + memcpy(dp, head->iov_base, head->iov_len); + buflen -= head->iov_len; + dp += head->iov_len; + + while (buflen > 0) { + len = min_t(u32, buflen, PAGE_SIZE); + memcpy(dp, page_address(*pages), len); + + buflen -= len; + dp += len; + pages++; + } + + *bufp = tmp; + return 0; +} + +/* + * Get a user extended attribute name from the XDR buffer. + * It will not have the "user." prefix, so prepend it. + * Lastly, check for nul characters in the name. + */ +static __be32 +nfsd4_decode_xattr_name(struct nfsd4_compoundargs *argp, char **namep) +{ + char *name, *sp, *dp; + u32 namelen, cnt; + __be32 *p; + + if (xdr_stream_decode_u32(argp->xdr, &namelen) < 0) + return nfserr_bad_xdr; + if (namelen > (XATTR_NAME_MAX - XATTR_USER_PREFIX_LEN)) + return nfserr_nametoolong; + if (namelen == 0) + return nfserr_bad_xdr; + p = xdr_inline_decode(argp->xdr, namelen); + if (!p) + return nfserr_bad_xdr; + name = svcxdr_tmpalloc(argp, namelen + XATTR_USER_PREFIX_LEN + 1); + if (!name) + return nfserr_jukebox; + memcpy(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN); + + /* + * Copy the extended attribute name over while checking for 0 + * characters. + */ + sp = (char *)p; + dp = name + XATTR_USER_PREFIX_LEN; + cnt = namelen; + + while (cnt-- > 0) { + if (*sp == '\0') + return nfserr_bad_xdr; + *dp++ = *sp++; + } + *dp = '\0'; + + *namep = name; + + return nfs_ok; +} + +/* + * A GETXATTR op request comes without a length specifier. We just set the + * maximum length for the reply based on XATTR_SIZE_MAX and the maximum + * channel reply size. nfsd_getxattr will probe the length of the xattr, + * check it against getxa_len, and allocate + return the value. + */ +static __be32 +nfsd4_decode_getxattr(struct nfsd4_compoundargs *argp, + struct nfsd4_getxattr *getxattr) { - DECODE_HEAD; + __be32 status; + u32 maxcount; - status = nfsd4_decode_stateid(argp, &seek->seek_stateid); + memset(getxattr, 0, sizeof(*getxattr)); + status = nfsd4_decode_xattr_name(argp, &getxattr->getxa_name); if (status) return status; - READ_BUF(8 + 4); - p = xdr_decode_hyper(p, &seek->seek_offset); - seek->seek_whence = be32_to_cpup(p); + maxcount = svc_max_payload(argp->rqstp); + maxcount = min_t(u32, XATTR_SIZE_MAX, maxcount); - DECODE_TAIL; + getxattr->getxa_len = maxcount; + return nfs_ok; +} + +static __be32 +nfsd4_decode_setxattr(struct nfsd4_compoundargs *argp, + struct nfsd4_setxattr *setxattr) +{ + u32 flags, maxcount, size; + __be32 status; + + memset(setxattr, 0, sizeof(*setxattr)); + + if (xdr_stream_decode_u32(argp->xdr, &flags) < 0) + return nfserr_bad_xdr; + + if (flags > SETXATTR4_REPLACE) + return nfserr_inval; + setxattr->setxa_flags = flags; + + status = nfsd4_decode_xattr_name(argp, &setxattr->setxa_name); + if (status) + return status; + + maxcount = svc_max_payload(argp->rqstp); + maxcount = min_t(u32, XATTR_SIZE_MAX, maxcount); + + if (xdr_stream_decode_u32(argp->xdr, &size) < 0) + return nfserr_bad_xdr; + if (size > maxcount) + return nfserr_xattr2big; + + setxattr->setxa_len = size; + if (size > 0) { + struct xdr_buf payload; + + if (!xdr_stream_subsegment(argp->xdr, &payload, size)) + return nfserr_bad_xdr; + status = nfsd4_vbuf_from_vector(argp, &payload, + &setxattr->setxa_buf, size); + } + + return nfs_ok; +} + +static __be32 +nfsd4_decode_listxattrs(struct nfsd4_compoundargs *argp, + struct nfsd4_listxattrs *listxattrs) +{ + u32 maxcount; + + memset(listxattrs, 0, sizeof(*listxattrs)); + + if (xdr_stream_decode_u64(argp->xdr, &listxattrs->lsxa_cookie) < 0) + return nfserr_bad_xdr; + + /* + * If the cookie is too large to have even one user.x attribute + * plus trailing '\0' left in a maximum size buffer, it's invalid. + */ + if (listxattrs->lsxa_cookie >= + (XATTR_LIST_MAX / (XATTR_USER_PREFIX_LEN + 2))) + return nfserr_badcookie; + + if (xdr_stream_decode_u32(argp->xdr, &maxcount) < 0) + return nfserr_bad_xdr; + if (maxcount < 8) + /* Always need at least 2 words (length and one character) */ + return nfserr_inval; + + maxcount = min(maxcount, svc_max_payload(argp->rqstp)); + listxattrs->lsxa_maxcount = maxcount; + + return nfs_ok; +} + +static __be32 +nfsd4_decode_removexattr(struct nfsd4_compoundargs *argp, + struct nfsd4_removexattr *removexattr) +{ + memset(removexattr, 0, sizeof(*removexattr)); + return nfsd4_decode_xattr_name(argp, &removexattr->rmxa_name); } static __be32 @@ -1957,10 +2343,15 @@ static const nfsd4_dec nfsd4_dec_ops[] = { [OP_LAYOUTSTATS] = (nfsd4_dec)nfsd4_decode_notsupp, [OP_OFFLOAD_CANCEL] = (nfsd4_dec)nfsd4_decode_offload_status, [OP_OFFLOAD_STATUS] = (nfsd4_dec)nfsd4_decode_offload_status, - [OP_READ_PLUS] = (nfsd4_dec)nfsd4_decode_notsupp, + [OP_READ_PLUS] = (nfsd4_dec)nfsd4_decode_read, [OP_SEEK] = (nfsd4_dec)nfsd4_decode_seek, [OP_WRITE_SAME] = (nfsd4_dec)nfsd4_decode_notsupp, [OP_CLONE] = (nfsd4_dec)nfsd4_decode_clone, + /* RFC 8276 extended atributes operations */ + [OP_GETXATTR] = (nfsd4_dec)nfsd4_decode_getxattr, + [OP_SETXATTR] = (nfsd4_dec)nfsd4_decode_setxattr, + [OP_LISTXATTRS] = (nfsd4_dec)nfsd4_decode_listxattrs, + [OP_REMOVEXATTR] = (nfsd4_dec)nfsd4_decode_removexattr, }; static inline bool @@ -1977,43 +2368,46 @@ nfsd4_opnum_in_range(struct nfsd4_compoundargs *argp, struct nfsd4_op *op) return true; } -static __be32 +static bool nfsd4_decode_compound(struct nfsd4_compoundargs *argp) { - DECODE_HEAD; struct nfsd4_op *op; bool cachethis = false; int auth_slack= argp->rqstp->rq_auth_slack; int max_reply = auth_slack + 8; /* opcnt, status */ int readcount = 0; int readbytes = 0; + __be32 *p; int i; - READ_BUF(4); - argp->taglen = be32_to_cpup(p++); - READ_BUF(argp->taglen); - SAVEMEM(argp->tag, argp->taglen); - READ_BUF(8); - argp->minorversion = be32_to_cpup(p++); - argp->opcnt = be32_to_cpup(p++); - max_reply += 4 + (XDR_QUADLEN(argp->taglen) << 2); - - if (argp->taglen > NFSD4_MAX_TAGLEN) - goto xdr_error; - /* - * NFS4ERR_RESOURCE is a more helpful error than GARBAGE_ARGS - * here, so we return success at the xdr level so that - * nfsd4_proc can handle this is an NFS-level error. - */ - if (argp->opcnt > NFSD_MAX_OPS_PER_COMPOUND) - return 0; + if (xdr_stream_decode_u32(argp->xdr, &argp->taglen) < 0) + return false; + max_reply += XDR_UNIT; + argp->tag = NULL; + if (unlikely(argp->taglen)) { + if (argp->taglen > NFSD4_MAX_TAGLEN) + return false; + p = xdr_inline_decode(argp->xdr, argp->taglen); + if (!p) + return false; + argp->tag = svcxdr_savemem(argp, p, argp->taglen); + if (!argp->tag) + return false; + max_reply += xdr_align_size(argp->taglen); + } + + if (xdr_stream_decode_u32(argp->xdr, &argp->minorversion) < 0) + return false; + if (xdr_stream_decode_u32(argp->xdr, &argp->client_opcnt) < 0) + return false; + argp->opcnt = min_t(u32, argp->client_opcnt, + NFSD_MAX_OPS_PER_COMPOUND); if (argp->opcnt > ARRAY_SIZE(argp->iops)) { - argp->ops = kzalloc(argp->opcnt * sizeof(*argp->ops), GFP_KERNEL); + argp->ops = vcalloc(argp->opcnt, sizeof(*argp->ops)); if (!argp->ops) { argp->ops = argp->iops; - dprintk("nfsd: couldn't allocate room for COMPOUND\n"); - goto xdr_error; + return false; } } @@ -2024,12 +2418,16 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) op = &argp->ops[i]; op->replay = NULL; - READ_BUF(4); - op->opnum = be32_to_cpup(p++); - - if (nfsd4_opnum_in_range(argp, op)) + if (xdr_stream_decode_u32(argp->xdr, &op->opnum) < 0) + return false; + if (nfsd4_opnum_in_range(argp, op)) { op->status = nfsd4_dec_ops[op->opnum](argp, &op->u); - else { + if (op->status != nfs_ok) + trace_nfsd_compound_decode_err(argp->rqstp, + argp->opcnt, i, + op->opnum, + op->status); + } else { op->opnum = OP_ILLEGAL; op->status = nfserr_op_illegal; } @@ -2040,7 +2438,7 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) */ cachethis |= nfsd4_cache_this_op(op); - if (op->opnum == OP_READ) { + if (op->opnum == OP_READ || op->opnum == OP_READ_PLUS) { readcount++; readbytes += nfsd4_max_reply(argp->rqstp, op); } else @@ -2066,9 +2464,9 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) argp->rqstp->rq_cachetype = cachethis ? RC_REPLBUFF : RC_NOCACHE; if (readcount > 1 || max_reply > PAGE_SIZE - auth_slack) - clear_bit(RQ_SPLICE_OK, &argp->rqstp->rq_flags); + __clear_bit(RQ_SPLICE_OK, &argp->rqstp->rq_flags); - DECODE_TAIL; + return true; } static __be32 *encode_change(__be32 *p, struct kstat *stat, struct inode *inode, @@ -2077,12 +2475,8 @@ static __be32 *encode_change(__be32 *p, struct kstat *stat, struct inode *inode, if (exp->ex_flags & NFSEXP_V4ROOT) { *p++ = cpu_to_be32(convert_to_wallclock(exp->cd->flush_time)); *p++ = 0; - } else if (IS_I_VERSION(inode)) { + } else p = xdr_encode_hyper(p, nfsd4_change_attribute(stat, inode)); - } else { - *p++ = cpu_to_be32(stat->ctime.tv_sec); - *p++ = cpu_to_be32(stat->ctime.tv_nsec); - } return p; } @@ -2114,15 +2508,8 @@ static __be32 *encode_time_delta(__be32 *p, struct inode *inode) static __be32 *encode_cinfo(__be32 *p, struct nfsd4_change_info *c) { *p++ = cpu_to_be32(c->atomic); - if (c->change_supported) { - p = xdr_encode_hyper(p, c->before_change); - p = xdr_encode_hyper(p, c->after_change); - } else { - *p++ = cpu_to_be32(c->before_ctime_sec); - *p++ = cpu_to_be32(c->before_ctime_nsec); - *p++ = cpu_to_be32(c->after_ctime_sec); - *p++ = cpu_to_be32(c->after_ctime_nsec); - } + p = xdr_encode_hyper(p, c->before_change); + p = xdr_encode_hyper(p, c->after_change); return p; } @@ -2337,7 +2724,7 @@ static u32 nfs4_file_type(umode_t mode) case S_IFREG: return NF4REG; case S_IFSOCK: return NF4SOCK; default: return NF4BAD; - }; + } } static inline __be32 @@ -2421,9 +2808,10 @@ static __be32 fattr_handle_absent_fs(u32 *bmval0, u32 *bmval1, u32 *bmval2, u32 } -static int get_parent_attributes(struct svc_export *exp, struct kstat *stat) +static int nfsd4_get_mounted_on_ino(struct svc_export *exp, u64 *pino) { struct path path = exp->ex_path; + struct kstat stat; int err; path_get(&path); @@ -2431,8 +2819,10 @@ static int get_parent_attributes(struct svc_export *exp, struct kstat *stat) if (path.dentry != path.mnt->mnt_root) break; } - err = vfs_getattr(&path, stat, STATX_BASIC_STATS, AT_STATX_SYNC_AS_STAT); + err = vfs_getattr(&path, &stat, STATX_INO, AT_STATX_SYNC_AS_STAT); path_put(&path); + if (!err) + *pino = stat.ino; return err; } @@ -2485,10 +2875,9 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, struct kstat stat; struct svc_fh *tempfh = NULL; struct kstatfs statfs; - __be32 *p; + __be32 *p, *attrlen_p; int starting_len = xdr->buf->len; int attrlen_offset; - __be32 attrlen; u32 dummy; u64 dummy64; u32 rdattr_err = 0; @@ -2520,6 +2909,9 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, err = vfs_getattr(&path, &stat, STATX_BASIC_STATS, AT_STATX_SYNC_AS_STAT); if (err) goto out_nfserr; + if (!(stat.result_mask & STATX_BTIME)) + /* underlying FS does not offer btime so we can't share it */ + bmval1 &= ~FATTR4_WORD1_TIME_CREATE; if ((bmval0 & (FATTR4_WORD0_FILES_AVAIL | FATTR4_WORD0_FILES_FREE | FATTR4_WORD0_FILES_TOTAL | FATTR4_WORD0_MAXNAME)) || (bmval1 & (FATTR4_WORD1_SPACE_AVAIL | FATTR4_WORD1_SPACE_FREE | @@ -2573,10 +2965,9 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, goto out; attrlen_offset = xdr->buf->len; - p = xdr_reserve_space(xdr, 4); - if (!p) + attrlen_p = xdr_reserve_space(xdr, XDR_UNIT); + if (!attrlen_p) goto out_resource; - p++; /* to be backfilled later */ if (bmval0 & FATTR4_WORD0_SUPPORTED_ATTRS) { u32 supp[3]; @@ -2762,7 +3153,7 @@ out_acl: p = xdr_reserve_space(xdr, fhp->fh_handle.fh_size + 4); if (!p) goto out_resource; - p = xdr_encode_opaque(p, &fhp->fh_handle.fh_base, + p = xdr_encode_opaque(p, &fhp->fh_handle.fh_raw, fhp->fh_handle.fh_size); } if (bmval0 & FATTR4_WORD0_FILEID) { @@ -2920,23 +3311,29 @@ out_acl: p = xdr_encode_hyper(p, (s64)stat.mtime.tv_sec); *p++ = cpu_to_be32(stat.mtime.tv_nsec); } + if (bmval1 & FATTR4_WORD1_TIME_CREATE) { + p = xdr_reserve_space(xdr, 12); + if (!p) + goto out_resource; + p = xdr_encode_hyper(p, (s64)stat.btime.tv_sec); + *p++ = cpu_to_be32(stat.btime.tv_nsec); + } if (bmval1 & FATTR4_WORD1_MOUNTED_ON_FILEID) { - struct kstat parent_stat; u64 ino = stat.ino; p = xdr_reserve_space(xdr, 8); if (!p) goto out_resource; /* - * Get parent's attributes if not ignoring crossmount - * and this is the root of a cross-mounted filesystem. + * Get ino of mountpoint in parent filesystem, if not ignoring + * crossmount and this is the root of a cross-mounted + * filesystem. */ if (ignore_crossmnt == 0 && dentry == exp->ex_path.mnt->mnt_root) { - err = get_parent_attributes(exp, &parent_stat); + err = nfsd4_get_mounted_on_ino(exp, &ino); if (err) goto out_nfserr; - ino = parent_stat.ino; } p = xdr_encode_hyper(p, ino); } @@ -2973,16 +3370,6 @@ out_acl: goto out; } - if (bmval2 & FATTR4_WORD2_CHANGE_ATTR_TYPE) { - p = xdr_reserve_space(xdr, 4); - if (!p) - goto out_resource; - if (IS_I_VERSION(d_inode(dentry))) - *p++ = cpu_to_be32(NFS4_CHANGE_TYPE_IS_MONOTONIC_INCR); - else - *p++ = cpu_to_be32(NFS4_CHANGE_TYPE_IS_TIME_METADATA); - } - #ifdef CONFIG_NFSD_V4_SECURITY_LABEL if (bmval2 & FATTR4_WORD2_SECURITY_LABEL) { status = nfsd4_encode_security_label(xdr, rqstp, context, @@ -2992,8 +3379,16 @@ out_acl: } #endif - attrlen = htonl(xdr->buf->len - attrlen_offset - 4); - write_bytes_to_xdr_buf(xdr->buf, attrlen_offset, &attrlen, 4); + if (bmval2 & FATTR4_WORD2_XATTR_SUPPORT) { + p = xdr_reserve_space(xdr, 4); + if (!p) + goto out_resource; + err = xattr_supported_namespace(d_inode(dentry), + XATTR_USER_PREFIX); + *p++ = cpu_to_be32(err == 0); + } + + *attrlen_p = cpu_to_be32(xdr->buf->len - attrlen_offset - XDR_UNIT); status = nfs_ok; out: @@ -3162,7 +3557,7 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen, p = xdr_reserve_space(xdr, 3*4 + namlen); if (!p) goto fail; - p = xdr_encode_hyper(p, NFS_OFFSET_MAX); /* offset of next entry */ + p = xdr_encode_hyper(p, OFFSET_MAX); /* offset of next entry */ p = xdr_encode_array(p, name, namlen); /* name length & name */ nfserr = nfsd4_encode_dirent_fattr(xdr, cd, name, namlen); @@ -3197,15 +3592,18 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen, goto fail; cd->rd_maxcount -= entry_bytes; /* - * RFC 3530 14.2.24 describes rd_dircount as only a "hint", so - * let's always let through the first entry, at least: + * RFC 3530 14.2.24 describes rd_dircount as only a "hint", and + * notes that it could be zero. If it is zero, then the server + * should enforce only the rd_maxcount value. */ - if (!cd->rd_dircount) - goto fail; - name_and_cookie = 4 + 4 * XDR_QUADLEN(namlen) + 8; - if (name_and_cookie > cd->rd_dircount && cd->cookie_offset) - goto fail; - cd->rd_dircount -= min(cd->rd_dircount, name_and_cookie); + if (cd->rd_dircount) { + name_and_cookie = 4 + 4 * XDR_QUADLEN(namlen) + 8; + if (name_and_cookie > cd->rd_dircount && cd->cookie_offset) + goto fail; + cd->rd_dircount -= min(cd->rd_dircount, name_and_cookie); + if (!cd->rd_dircount) + cd->rd_maxcount = 0; + } cd->cookie_offset = cookie_offset; skip_entry: @@ -3234,7 +3632,7 @@ nfsd4_encode_stateid(struct xdr_stream *xdr, stateid_t *sid) static __be32 nfsd4_encode_access(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_access *access) { - struct xdr_stream *xdr = &resp->xdr; + struct xdr_stream *xdr = resp->xdr; __be32 *p; p = xdr_reserve_space(xdr, 8); @@ -3247,7 +3645,7 @@ nfsd4_encode_access(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_ static __be32 nfsd4_encode_bind_conn_to_session(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_bind_conn_to_session *bcts) { - struct xdr_stream *xdr = &resp->xdr; + struct xdr_stream *xdr = resp->xdr; __be32 *p; p = xdr_reserve_space(xdr, NFS4_MAX_SESSIONID_LEN + 8); @@ -3264,7 +3662,7 @@ static __be32 nfsd4_encode_bind_conn_to_session(struct nfsd4_compoundres *resp, static __be32 nfsd4_encode_close(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_close *close) { - struct xdr_stream *xdr = &resp->xdr; + struct xdr_stream *xdr = resp->xdr; return nfsd4_encode_stateid(xdr, &close->cl_stateid); } @@ -3273,7 +3671,7 @@ nfsd4_encode_close(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_c static __be32 nfsd4_encode_commit(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_commit *commit) { - struct xdr_stream *xdr = &resp->xdr; + struct xdr_stream *xdr = resp->xdr; __be32 *p; p = xdr_reserve_space(xdr, NFS4_VERIFIER_SIZE); @@ -3287,7 +3685,7 @@ nfsd4_encode_commit(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_ static __be32 nfsd4_encode_create(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_create *create) { - struct xdr_stream *xdr = &resp->xdr; + struct xdr_stream *xdr = resp->xdr; __be32 *p; p = xdr_reserve_space(xdr, 20); @@ -3302,7 +3700,7 @@ static __be32 nfsd4_encode_getattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_getattr *getattr) { struct svc_fh *fhp = getattr->ga_fhp; - struct xdr_stream *xdr = &resp->xdr; + struct xdr_stream *xdr = resp->xdr; return nfsd4_encode_fattr(xdr, fhp, fhp->fh_export, fhp->fh_dentry, getattr->ga_bmval, resp->rqstp, 0); @@ -3311,7 +3709,7 @@ nfsd4_encode_getattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4 static __be32 nfsd4_encode_getfh(struct nfsd4_compoundres *resp, __be32 nfserr, struct svc_fh **fhpp) { - struct xdr_stream *xdr = &resp->xdr; + struct xdr_stream *xdr = resp->xdr; struct svc_fh *fhp = *fhpp; unsigned int len; __be32 *p; @@ -3320,7 +3718,7 @@ nfsd4_encode_getfh(struct nfsd4_compoundres *resp, __be32 nfserr, struct svc_fh p = xdr_reserve_space(xdr, len + 4); if (!p) return nfserr_resource; - p = xdr_encode_opaque(p, &fhp->fh_handle.fh_base, len); + p = xdr_encode_opaque(p, &fhp->fh_handle.fh_raw, len); return 0; } @@ -3366,7 +3764,7 @@ again: static __be32 nfsd4_encode_lock(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_lock *lock) { - struct xdr_stream *xdr = &resp->xdr; + struct xdr_stream *xdr = resp->xdr; if (!nfserr) nfserr = nfsd4_encode_stateid(xdr, &lock->lk_resp_stateid); @@ -3379,7 +3777,7 @@ nfsd4_encode_lock(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_lo static __be32 nfsd4_encode_lockt(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_lockt *lockt) { - struct xdr_stream *xdr = &resp->xdr; + struct xdr_stream *xdr = resp->xdr; if (nfserr == nfserr_denied) nfsd4_encode_lock_denied(xdr, &lockt->lt_denied); @@ -3389,7 +3787,7 @@ nfsd4_encode_lockt(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_l static __be32 nfsd4_encode_locku(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_locku *locku) { - struct xdr_stream *xdr = &resp->xdr; + struct xdr_stream *xdr = resp->xdr; return nfsd4_encode_stateid(xdr, &locku->lu_stateid); } @@ -3398,7 +3796,7 @@ nfsd4_encode_locku(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_l static __be32 nfsd4_encode_link(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_link *link) { - struct xdr_stream *xdr = &resp->xdr; + struct xdr_stream *xdr = resp->xdr; __be32 *p; p = xdr_reserve_space(xdr, 20); @@ -3412,7 +3810,7 @@ nfsd4_encode_link(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_li static __be32 nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open *open) { - struct xdr_stream *xdr = &resp->xdr; + struct xdr_stream *xdr = resp->xdr; __be32 *p; nfserr = nfsd4_encode_stateid(xdr, &open->op_stateid); @@ -3506,7 +3904,7 @@ nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_op static __be32 nfsd4_encode_open_confirm(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open_confirm *oc) { - struct xdr_stream *xdr = &resp->xdr; + struct xdr_stream *xdr = resp->xdr; return nfsd4_encode_stateid(xdr, &oc->oc_resp_stateid); } @@ -3514,7 +3912,7 @@ nfsd4_encode_open_confirm(struct nfsd4_compoundres *resp, __be32 nfserr, struct static __be32 nfsd4_encode_open_downgrade(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open_downgrade *od) { - struct xdr_stream *xdr = &resp->xdr; + struct xdr_stream *xdr = resp->xdr; return nfsd4_encode_stateid(xdr, &od->od_stateid); } @@ -3524,33 +3922,28 @@ static __be32 nfsd4_encode_splice_read( struct nfsd4_read *read, struct file *file, unsigned long maxcount) { - struct xdr_stream *xdr = &resp->xdr; + struct xdr_stream *xdr = resp->xdr; struct xdr_buf *buf = xdr->buf; - u32 eof; - int space_left; + int status, space_left; __be32 nfserr; - __be32 *p = xdr->p - 2; /* Make sure there will be room for padding if needed */ if (xdr->end - xdr->p < 1) return nfserr_resource; nfserr = nfsd_splice_read(read->rd_rqstp, read->rd_fhp, - file, read->rd_offset, &maxcount, &eof); + file, read->rd_offset, &maxcount, + &read->rd_eof); read->rd_length = maxcount; - if (nfserr) { - /* - * nfsd_splice_actor may have already messed with the - * page length; reset it so as not to confuse - * xdr_truncate_encode: - */ - buf->page_len = 0; - return nfserr; + if (nfserr) + goto out_err; + status = svc_encode_result_payload(read->rd_rqstp, + buf->head[0].iov_len, maxcount); + if (status) { + nfserr = nfserrno(status); + goto out_err; } - *(p++) = htonl(eof); - *(p++) = htonl(maxcount); - buf->page_len = maxcount; buf->len += maxcount; xdr->page_ptr += (buf->page_base + maxcount + PAGE_SIZE - 1) @@ -3576,73 +3969,52 @@ static __be32 nfsd4_encode_splice_read( xdr->end = (__be32 *)((void *)xdr->end + space_left); return 0; + +out_err: + /* + * nfsd_splice_actor may have already messed with the + * page length; reset it so as not to confuse + * xdr_truncate_encode in our caller. + */ + buf->page_len = 0; + return nfserr; } static __be32 nfsd4_encode_readv(struct nfsd4_compoundres *resp, struct nfsd4_read *read, struct file *file, unsigned long maxcount) { - struct xdr_stream *xdr = &resp->xdr; - u32 eof; - int v; - int starting_len = xdr->buf->len - 8; - long len; - int thislen; + struct xdr_stream *xdr = resp->xdr; + unsigned int starting_len = xdr->buf->len; + __be32 zero = xdr_zero; __be32 nfserr; - __be32 tmp; - __be32 *p; - u32 zzz = 0; - int pad; - - len = maxcount; - v = 0; - - thislen = min_t(long, len, ((void *)xdr->end - (void *)xdr->p)); - p = xdr_reserve_space(xdr, (thislen+3)&~3); - WARN_ON_ONCE(!p); - resp->rqstp->rq_vec[v].iov_base = p; - resp->rqstp->rq_vec[v].iov_len = thislen; - v++; - len -= thislen; - - while (len) { - thislen = min_t(long, len, PAGE_SIZE); - p = xdr_reserve_space(xdr, (thislen+3)&~3); - WARN_ON_ONCE(!p); - resp->rqstp->rq_vec[v].iov_base = p; - resp->rqstp->rq_vec[v].iov_len = thislen; - v++; - len -= thislen; - } - read->rd_vlen = v; - - len = maxcount; + + read->rd_vlen = xdr_reserve_space_vec(xdr, resp->rqstp->rq_vec, maxcount); + if (read->rd_vlen < 0) + return nfserr_resource; + nfserr = nfsd_readv(resp->rqstp, read->rd_fhp, file, read->rd_offset, resp->rqstp->rq_vec, read->rd_vlen, &maxcount, - &eof); + &read->rd_eof); read->rd_length = maxcount; if (nfserr) return nfserr; - xdr_truncate_encode(xdr, starting_len + 8 + ((maxcount+3)&~3)); - - tmp = htonl(eof); - write_bytes_to_xdr_buf(xdr->buf, starting_len , &tmp, 4); - tmp = htonl(maxcount); - write_bytes_to_xdr_buf(xdr->buf, starting_len + 4, &tmp, 4); - - pad = (maxcount&3) ? 4 - (maxcount&3) : 0; - write_bytes_to_xdr_buf(xdr->buf, starting_len + 8 + maxcount, - &zzz, pad); - return 0; + if (svc_encode_result_payload(resp->rqstp, starting_len, maxcount)) + return nfserr_io; + xdr_truncate_encode(xdr, starting_len + xdr_align_size(maxcount)); + write_bytes_to_xdr_buf(xdr->buf, starting_len + maxcount, &zero, + xdr_pad_size(maxcount)); + return nfs_ok; } static __be32 nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_read *read) { + bool splice_ok = test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags); unsigned long maxcount; - struct xdr_stream *xdr = &resp->xdr; + struct xdr_stream *xdr = resp->xdr; struct file *file; int starting_len = xdr->buf->len; __be32 *p; @@ -3653,45 +4025,42 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, p = xdr_reserve_space(xdr, 8); /* eof flag and byte count */ if (!p) { - WARN_ON_ONCE(test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags)); + WARN_ON_ONCE(splice_ok); return nfserr_resource; } - if (resp->xdr.buf->page_len && - test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags)) { + if (resp->xdr->buf->page_len && splice_ok) { WARN_ON_ONCE(1); - return nfserr_resource; + return nfserr_serverfault; } xdr_commit_encode(xdr); - maxcount = svc_max_payload(resp->rqstp); - maxcount = min_t(unsigned long, maxcount, + maxcount = min_t(unsigned long, read->rd_length, (xdr->buf->buflen - xdr->buf->len)); - maxcount = min_t(unsigned long, maxcount, read->rd_length); - if (file->f_op->splice_read && - test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags)) + if (file->f_op->splice_read && splice_ok) nfserr = nfsd4_encode_splice_read(resp, read, file, maxcount); else nfserr = nfsd4_encode_readv(resp, read, file, maxcount); - - if (nfserr) + if (nfserr) { xdr_truncate_encode(xdr, starting_len); + return nfserr; + } - return nfserr; + p = xdr_encode_bool(p, read->rd_eof); + *p = cpu_to_be32(read->rd_length); + return nfs_ok; } static __be32 nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_readlink *readlink) { - int maxcount; - __be32 wire_count; - int zero = 0; - struct xdr_stream *xdr = &resp->xdr; + __be32 *p, *maxcount_p, zero = xdr_zero; + struct xdr_stream *xdr = resp->xdr; int length_offset = xdr->buf->len; - __be32 *p; + int maxcount, status; - p = xdr_reserve_space(xdr, 4); - if (!p) + maxcount_p = xdr_reserve_space(xdr, XDR_UNIT); + if (!maxcount_p) return nfserr_resource; maxcount = PAGE_SIZE; @@ -3708,18 +4077,23 @@ nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd (char *)p, &maxcount); if (nfserr == nfserr_isdir) nfserr = nfserr_inval; - if (nfserr) { - xdr_truncate_encode(xdr, length_offset); - return nfserr; - } + if (nfserr) + goto out_err; + status = svc_encode_result_payload(readlink->rl_rqstp, length_offset, + maxcount); + if (status) { + nfserr = nfserrno(status); + goto out_err; + } + *maxcount_p = cpu_to_be32(maxcount); + xdr_truncate_encode(xdr, length_offset + 4 + xdr_align_size(maxcount)); + write_bytes_to_xdr_buf(xdr->buf, length_offset + 4 + maxcount, &zero, + xdr_pad_size(maxcount)); + return nfs_ok; - wire_count = htonl(maxcount); - write_bytes_to_xdr_buf(xdr->buf, length_offset, &wire_count, 4); - xdr_truncate_encode(xdr, length_offset + 4 + ALIGN(maxcount, 4)); - if (maxcount & 3) - write_bytes_to_xdr_buf(xdr->buf, length_offset + 4 + maxcount, - &zero, 4 - (maxcount&3)); - return 0; +out_err: + xdr_truncate_encode(xdr, length_offset); + return nfserr; } static __be32 @@ -3729,7 +4103,7 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4 int bytes_left; loff_t offset; __be64 wire_offset; - struct xdr_stream *xdr = &resp->xdr; + struct xdr_stream *xdr = resp->xdr; int starting_len = xdr->buf->len; __be32 *p; @@ -3740,8 +4114,8 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4 /* XXX: Following NFSv3, we ignore the READDIR verifier for now. */ *p++ = cpu_to_be32(0); *p++ = cpu_to_be32(0); - resp->xdr.buf->head[0].iov_len = ((char *)resp->xdr.p) - - (char *)resp->xdr.buf->head[0].iov_base; + xdr->buf->head[0].iov_len = (char *)xdr->p - + (char *)xdr->buf->head[0].iov_base; /* * Number of bytes left for directory entries allowing for the @@ -3816,7 +4190,7 @@ err_no_verf: static __be32 nfsd4_encode_remove(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_remove *remove) { - struct xdr_stream *xdr = &resp->xdr; + struct xdr_stream *xdr = resp->xdr; __be32 *p; p = xdr_reserve_space(xdr, 20); @@ -3829,7 +4203,7 @@ nfsd4_encode_remove(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_ static __be32 nfsd4_encode_rename(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_rename *rename) { - struct xdr_stream *xdr = &resp->xdr; + struct xdr_stream *xdr = resp->xdr; __be32 *p; p = xdr_reserve_space(xdr, 40); @@ -3912,7 +4286,7 @@ static __be32 nfsd4_encode_secinfo(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_secinfo *secinfo) { - struct xdr_stream *xdr = &resp->xdr; + struct xdr_stream *xdr = resp->xdr; return nfsd4_do_encode_secinfo(xdr, secinfo->si_exp); } @@ -3921,7 +4295,7 @@ static __be32 nfsd4_encode_secinfo_no_name(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_secinfo_no_name *secinfo) { - struct xdr_stream *xdr = &resp->xdr; + struct xdr_stream *xdr = resp->xdr; return nfsd4_do_encode_secinfo(xdr, secinfo->sin_exp); } @@ -3933,7 +4307,7 @@ nfsd4_encode_secinfo_no_name(struct nfsd4_compoundres *resp, __be32 nfserr, static __be32 nfsd4_encode_setattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_setattr *setattr) { - struct xdr_stream *xdr = &resp->xdr; + struct xdr_stream *xdr = resp->xdr; __be32 *p; p = xdr_reserve_space(xdr, 16); @@ -3957,7 +4331,7 @@ nfsd4_encode_setattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4 static __be32 nfsd4_encode_setclientid(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_setclientid *scd) { - struct xdr_stream *xdr = &resp->xdr; + struct xdr_stream *xdr = resp->xdr; __be32 *p; if (!nfserr) { @@ -3981,7 +4355,7 @@ nfsd4_encode_setclientid(struct nfsd4_compoundres *resp, __be32 nfserr, struct n static __be32 nfsd4_encode_write(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_write *write) { - struct xdr_stream *xdr = &resp->xdr; + struct xdr_stream *xdr = resp->xdr; __be32 *p; p = xdr_reserve_space(xdr, 16); @@ -3998,18 +4372,19 @@ static __be32 nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_exchange_id *exid) { - struct xdr_stream *xdr = &resp->xdr; + struct xdr_stream *xdr = resp->xdr; __be32 *p; char *major_id; char *server_scope; int major_id_sz; int server_scope_sz; uint64_t minor_id = 0; + struct nfsd_net *nn = net_generic(SVC_NET(resp->rqstp), nfsd_net_id); - major_id = utsname()->nodename; - major_id_sz = strlen(major_id); - server_scope = utsname()->nodename; - server_scope_sz = strlen(server_scope); + major_id = nn->nfsd_name; + major_id_sz = strlen(nn->nfsd_name); + server_scope = nn->nfsd_name; + server_scope_sz = strlen(nn->nfsd_name); p = xdr_reserve_space(xdr, 8 /* eir_clientid */ + @@ -4075,7 +4450,7 @@ static __be32 nfsd4_encode_create_session(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_create_session *sess) { - struct xdr_stream *xdr = &resp->xdr; + struct xdr_stream *xdr = resp->xdr; __be32 *p; p = xdr_reserve_space(xdr, 24); @@ -4128,7 +4503,7 @@ static __be32 nfsd4_encode_sequence(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_sequence *seq) { - struct xdr_stream *xdr = &resp->xdr; + struct xdr_stream *xdr = resp->xdr; __be32 *p; p = xdr_reserve_space(xdr, NFS4_MAX_SESSIONID_LEN + 20); @@ -4151,7 +4526,7 @@ static __be32 nfsd4_encode_test_stateid(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_test_stateid *test_stateid) { - struct xdr_stream *xdr = &resp->xdr; + struct xdr_stream *xdr = resp->xdr; struct nfsd4_test_stateid_id *stateid, *next; __be32 *p; @@ -4172,7 +4547,7 @@ static __be32 nfsd4_encode_getdeviceinfo(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_getdeviceinfo *gdev) { - struct xdr_stream *xdr = &resp->xdr; + struct xdr_stream *xdr = resp->xdr; const struct nfsd4_layout_ops *ops; u32 starting_len = xdr->buf->len, needed_len; __be32 *p; @@ -4228,7 +4603,7 @@ static __be32 nfsd4_encode_layoutget(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_layoutget *lgp) { - struct xdr_stream *xdr = &resp->xdr; + struct xdr_stream *xdr = resp->xdr; const struct nfsd4_layout_ops *ops; __be32 *p; @@ -4255,7 +4630,7 @@ static __be32 nfsd4_encode_layoutcommit(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_layoutcommit *lcp) { - struct xdr_stream *xdr = &resp->xdr; + struct xdr_stream *xdr = resp->xdr; __be32 *p; p = xdr_reserve_space(xdr, 4); @@ -4276,7 +4651,7 @@ static __be32 nfsd4_encode_layoutreturn(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_layoutreturn *lrp) { - struct xdr_stream *xdr = &resp->xdr; + struct xdr_stream *xdr = resp->xdr; __be32 *p; p = xdr_reserve_space(xdr, 4); @@ -4294,7 +4669,7 @@ nfsd42_encode_write_res(struct nfsd4_compoundres *resp, struct nfsd42_write_res *write, bool sync) { __be32 *p; - p = xdr_reserve_space(&resp->xdr, 4); + p = xdr_reserve_space(resp->xdr, 4); if (!p) return nfserr_resource; @@ -4303,11 +4678,11 @@ nfsd42_encode_write_res(struct nfsd4_compoundres *resp, else { __be32 nfserr; *p++ = cpu_to_be32(1); - nfserr = nfsd4_encode_stateid(&resp->xdr, &write->cb_stateid); + nfserr = nfsd4_encode_stateid(resp->xdr, &write->cb_stateid); if (nfserr) return nfserr; } - p = xdr_reserve_space(&resp->xdr, 8 + 4 + NFS4_VERIFIER_SIZE); + p = xdr_reserve_space(resp->xdr, 8 + 4 + NFS4_VERIFIER_SIZE); if (!p) return nfserr_resource; @@ -4321,7 +4696,7 @@ nfsd42_encode_write_res(struct nfsd4_compoundres *resp, static __be32 nfsd42_encode_nl4_server(struct nfsd4_compoundres *resp, struct nl4_server *ns) { - struct xdr_stream *xdr = &resp->xdr; + struct xdr_stream *xdr = resp->xdr; struct nfs42_netaddr *addr; __be32 *p; @@ -4365,13 +4740,13 @@ nfsd4_encode_copy(struct nfsd4_compoundres *resp, __be32 nfserr, __be32 *p; nfserr = nfsd42_encode_write_res(resp, ©->cp_res, - copy->cp_synchronous); + nfsd4_copy_is_sync(copy)); if (nfserr) return nfserr; - p = xdr_reserve_space(&resp->xdr, 4 + 4); + p = xdr_reserve_space(resp->xdr, 4 + 4); *p++ = xdr_one; /* cr_consecutive */ - *p++ = cpu_to_be32(copy->cp_synchronous); + *p = nfsd4_copy_is_sync(copy) ? xdr_one : xdr_zero; return 0; } @@ -4379,7 +4754,7 @@ static __be32 nfsd4_encode_offload_status(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_offload_status *os) { - struct xdr_stream *xdr = &resp->xdr; + struct xdr_stream *xdr = resp->xdr; __be32 *p; p = xdr_reserve_space(xdr, 8 + 4); @@ -4387,6 +4762,153 @@ nfsd4_encode_offload_status(struct nfsd4_compoundres *resp, __be32 nfserr, return nfserr_resource; p = xdr_encode_hyper(p, os->count); *p++ = cpu_to_be32(0); + return nfserr; +} + +static __be32 +nfsd4_encode_read_plus_data(struct nfsd4_compoundres *resp, + struct nfsd4_read *read, + unsigned long *maxcount, u32 *eof, + loff_t *pos) +{ + struct xdr_stream *xdr = resp->xdr; + struct file *file = read->rd_nf->nf_file; + int starting_len = xdr->buf->len; + loff_t hole_pos; + __be32 nfserr; + __be32 *p, tmp; + __be64 tmp64; + + hole_pos = pos ? *pos : vfs_llseek(file, read->rd_offset, SEEK_HOLE); + if (hole_pos > read->rd_offset) + *maxcount = min_t(unsigned long, *maxcount, hole_pos - read->rd_offset); + *maxcount = min_t(unsigned long, *maxcount, (xdr->buf->buflen - xdr->buf->len)); + + /* Content type, offset, byte count */ + p = xdr_reserve_space(xdr, 4 + 8 + 4); + if (!p) + return nfserr_resource; + + read->rd_vlen = xdr_reserve_space_vec(xdr, resp->rqstp->rq_vec, *maxcount); + if (read->rd_vlen < 0) + return nfserr_resource; + + nfserr = nfsd_readv(resp->rqstp, read->rd_fhp, file, read->rd_offset, + resp->rqstp->rq_vec, read->rd_vlen, maxcount, eof); + if (nfserr) + return nfserr; + xdr_truncate_encode(xdr, starting_len + 16 + xdr_align_size(*maxcount)); + + tmp = htonl(NFS4_CONTENT_DATA); + write_bytes_to_xdr_buf(xdr->buf, starting_len, &tmp, 4); + tmp64 = cpu_to_be64(read->rd_offset); + write_bytes_to_xdr_buf(xdr->buf, starting_len + 4, &tmp64, 8); + tmp = htonl(*maxcount); + write_bytes_to_xdr_buf(xdr->buf, starting_len + 12, &tmp, 4); + + tmp = xdr_zero; + write_bytes_to_xdr_buf(xdr->buf, starting_len + 16 + *maxcount, &tmp, + xdr_pad_size(*maxcount)); + return nfs_ok; +} + +static __be32 +nfsd4_encode_read_plus_hole(struct nfsd4_compoundres *resp, + struct nfsd4_read *read, + unsigned long *maxcount, u32 *eof) +{ + struct file *file = read->rd_nf->nf_file; + loff_t data_pos = vfs_llseek(file, read->rd_offset, SEEK_DATA); + loff_t f_size = i_size_read(file_inode(file)); + unsigned long count; + __be32 *p; + + if (data_pos == -ENXIO) + data_pos = f_size; + else if (data_pos <= read->rd_offset || (data_pos < f_size && data_pos % PAGE_SIZE)) + return nfsd4_encode_read_plus_data(resp, read, maxcount, eof, &f_size); + count = data_pos - read->rd_offset; + + /* Content type, offset, byte count */ + p = xdr_reserve_space(resp->xdr, 4 + 8 + 8); + if (!p) + return nfserr_resource; + + *p++ = htonl(NFS4_CONTENT_HOLE); + p = xdr_encode_hyper(p, read->rd_offset); + p = xdr_encode_hyper(p, count); + + *eof = (read->rd_offset + count) >= f_size; + *maxcount = min_t(unsigned long, count, *maxcount); + return nfs_ok; +} + +static __be32 +nfsd4_encode_read_plus(struct nfsd4_compoundres *resp, __be32 nfserr, + struct nfsd4_read *read) +{ + unsigned long maxcount, count; + struct xdr_stream *xdr = resp->xdr; + struct file *file; + int starting_len = xdr->buf->len; + int last_segment = xdr->buf->len; + int segments = 0; + __be32 *p, tmp; + bool is_data; + loff_t pos; + u32 eof; + + if (nfserr) + return nfserr; + file = read->rd_nf->nf_file; + + /* eof flag, segment count */ + p = xdr_reserve_space(xdr, 4 + 4); + if (!p) + return nfserr_resource; + xdr_commit_encode(xdr); + + maxcount = min_t(unsigned long, read->rd_length, + (xdr->buf->buflen - xdr->buf->len)); + count = maxcount; + + eof = read->rd_offset >= i_size_read(file_inode(file)); + if (eof) + goto out; + + pos = vfs_llseek(file, read->rd_offset, SEEK_HOLE); + is_data = pos > read->rd_offset; + + while (count > 0 && !eof) { + maxcount = count; + if (is_data) + nfserr = nfsd4_encode_read_plus_data(resp, read, &maxcount, &eof, + segments == 0 ? &pos : NULL); + else + nfserr = nfsd4_encode_read_plus_hole(resp, read, &maxcount, &eof); + if (nfserr) + goto out; + count -= maxcount; + read->rd_offset += maxcount; + is_data = !is_data; + last_segment = xdr->buf->len; + segments++; + } + +out: + if (nfserr && segments == 0) + xdr_truncate_encode(xdr, starting_len); + else { + if (nfserr) { + xdr_truncate_encode(xdr, last_segment); + nfserr = nfs_ok; + eof = 0; + } + tmp = htonl(eof); + write_bytes_to_xdr_buf(xdr->buf, starting_len, &tmp, 4); + tmp = htonl(segments); + write_bytes_to_xdr_buf(xdr->buf, starting_len + 4, &tmp, 4); + } return nfserr; } @@ -4395,7 +4917,7 @@ static __be32 nfsd4_encode_copy_notify(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_copy_notify *cn) { - struct xdr_stream *xdr = &resp->xdr; + struct xdr_stream *xdr = resp->xdr; __be32 *p; if (nfserr) @@ -4422,7 +4944,8 @@ nfsd4_encode_copy_notify(struct nfsd4_compoundres *resp, __be32 nfserr, *p++ = cpu_to_be32(1); - return nfsd42_encode_nl4_server(resp, &cn->cpn_src); + nfserr = nfsd42_encode_nl4_server(resp, cn->cpn_src); + return nfserr; } static __be32 @@ -4431,7 +4954,7 @@ nfsd4_encode_seek(struct nfsd4_compoundres *resp, __be32 nfserr, { __be32 *p; - p = xdr_reserve_space(&resp->xdr, 4 + 8); + p = xdr_reserve_space(resp->xdr, 4 + 8); *p++ = cpu_to_be32(seek->seek_eof); p = xdr_encode_hyper(p, seek->seek_pos); @@ -4444,6 +4967,241 @@ nfsd4_encode_noop(struct nfsd4_compoundres *resp, __be32 nfserr, void *p) return nfserr; } +/* + * Encode kmalloc-ed buffer in to XDR stream. + */ +static __be32 +nfsd4_vbuf_to_stream(struct xdr_stream *xdr, char *buf, u32 buflen) +{ + u32 cplen; + __be32 *p; + + cplen = min_t(unsigned long, buflen, + ((void *)xdr->end - (void *)xdr->p)); + p = xdr_reserve_space(xdr, cplen); + if (!p) + return nfserr_resource; + + memcpy(p, buf, cplen); + buf += cplen; + buflen -= cplen; + + while (buflen) { + cplen = min_t(u32, buflen, PAGE_SIZE); + p = xdr_reserve_space(xdr, cplen); + if (!p) + return nfserr_resource; + + memcpy(p, buf, cplen); + + if (cplen < PAGE_SIZE) { + /* + * We're done, with a length that wasn't page + * aligned, so possibly not word aligned. Pad + * any trailing bytes with 0. + */ + xdr_encode_opaque_fixed(p, NULL, cplen); + break; + } + + buflen -= PAGE_SIZE; + buf += PAGE_SIZE; + } + + return 0; +} + +static __be32 +nfsd4_encode_getxattr(struct nfsd4_compoundres *resp, __be32 nfserr, + struct nfsd4_getxattr *getxattr) +{ + struct xdr_stream *xdr = resp->xdr; + __be32 *p, err; + + p = xdr_reserve_space(xdr, 4); + if (!p) + return nfserr_resource; + + *p = cpu_to_be32(getxattr->getxa_len); + + if (getxattr->getxa_len == 0) + return 0; + + err = nfsd4_vbuf_to_stream(xdr, getxattr->getxa_buf, + getxattr->getxa_len); + + kvfree(getxattr->getxa_buf); + + return err; +} + +static __be32 +nfsd4_encode_setxattr(struct nfsd4_compoundres *resp, __be32 nfserr, + struct nfsd4_setxattr *setxattr) +{ + struct xdr_stream *xdr = resp->xdr; + __be32 *p; + + p = xdr_reserve_space(xdr, 20); + if (!p) + return nfserr_resource; + + encode_cinfo(p, &setxattr->setxa_cinfo); + + return 0; +} + +/* + * See if there are cookie values that can be rejected outright. + */ +static __be32 +nfsd4_listxattr_validate_cookie(struct nfsd4_listxattrs *listxattrs, + u32 *offsetp) +{ + u64 cookie = listxattrs->lsxa_cookie; + + /* + * If the cookie is larger than the maximum number we can fit + * in either the buffer we just got back from vfs_listxattr, or, + * XDR-encoded, in the return buffer, it's invalid. + */ + if (cookie > (listxattrs->lsxa_len) / (XATTR_USER_PREFIX_LEN + 2)) + return nfserr_badcookie; + + if (cookie > (listxattrs->lsxa_maxcount / + (XDR_QUADLEN(XATTR_USER_PREFIX_LEN + 2) + 4))) + return nfserr_badcookie; + + *offsetp = (u32)cookie; + return 0; +} + +static __be32 +nfsd4_encode_listxattrs(struct nfsd4_compoundres *resp, __be32 nfserr, + struct nfsd4_listxattrs *listxattrs) +{ + struct xdr_stream *xdr = resp->xdr; + u32 cookie_offset, count_offset, eof; + u32 left, xdrleft, slen, count; + u32 xdrlen, offset; + u64 cookie; + char *sp; + __be32 status, tmp; + __be32 *p; + u32 nuser; + + eof = 1; + + status = nfsd4_listxattr_validate_cookie(listxattrs, &offset); + if (status) + goto out; + + /* + * Reserve space for the cookie and the name array count. Record + * the offsets to save them later. + */ + cookie_offset = xdr->buf->len; + count_offset = cookie_offset + 8; + p = xdr_reserve_space(xdr, 12); + if (!p) { + status = nfserr_resource; + goto out; + } + + count = 0; + left = listxattrs->lsxa_len; + sp = listxattrs->lsxa_buf; + nuser = 0; + + xdrleft = listxattrs->lsxa_maxcount; + + while (left > 0 && xdrleft > 0) { + slen = strlen(sp); + + /* + * Check if this is a "user." attribute, skip it if not. + */ + if (strncmp(sp, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN)) + goto contloop; + + slen -= XATTR_USER_PREFIX_LEN; + xdrlen = 4 + ((slen + 3) & ~3); + if (xdrlen > xdrleft) { + if (count == 0) { + /* + * Can't even fit the first attribute name. + */ + status = nfserr_toosmall; + goto out; + } + eof = 0; + goto wreof; + } + + left -= XATTR_USER_PREFIX_LEN; + sp += XATTR_USER_PREFIX_LEN; + if (nuser++ < offset) + goto contloop; + + + p = xdr_reserve_space(xdr, xdrlen); + if (!p) { + status = nfserr_resource; + goto out; + } + + xdr_encode_opaque(p, sp, slen); + + xdrleft -= xdrlen; + count++; +contloop: + sp += slen + 1; + left -= slen + 1; + } + + /* + * If there were user attributes to copy, but we didn't copy + * any, the offset was too large (e.g. the cookie was invalid). + */ + if (nuser > 0 && count == 0) { + status = nfserr_badcookie; + goto out; + } + +wreof: + p = xdr_reserve_space(xdr, 4); + if (!p) { + status = nfserr_resource; + goto out; + } + *p = cpu_to_be32(eof); + + cookie = offset + count; + + write_bytes_to_xdr_buf(xdr->buf, cookie_offset, &cookie, 8); + tmp = cpu_to_be32(count); + write_bytes_to_xdr_buf(xdr->buf, count_offset, &tmp, 4); +out: + if (listxattrs->lsxa_len) + kvfree(listxattrs->lsxa_buf); + return status; +} + +static __be32 +nfsd4_encode_removexattr(struct nfsd4_compoundres *resp, __be32 nfserr, + struct nfsd4_removexattr *removexattr) +{ + struct xdr_stream *xdr = resp->xdr; + __be32 *p; + + p = xdr_reserve_space(xdr, 20); + if (!p) + return nfserr_resource; + + p = encode_cinfo(p, &removexattr->rmxa_cinfo); + return 0; +} + typedef __be32(* nfsd4_enc)(struct nfsd4_compoundres *, __be32, void *); /* @@ -4529,10 +5287,16 @@ static const nfsd4_enc nfsd4_enc_ops[] = { [OP_LAYOUTSTATS] = (nfsd4_enc)nfsd4_encode_noop, [OP_OFFLOAD_CANCEL] = (nfsd4_enc)nfsd4_encode_noop, [OP_OFFLOAD_STATUS] = (nfsd4_enc)nfsd4_encode_offload_status, - [OP_READ_PLUS] = (nfsd4_enc)nfsd4_encode_noop, + [OP_READ_PLUS] = (nfsd4_enc)nfsd4_encode_read_plus, [OP_SEEK] = (nfsd4_enc)nfsd4_encode_seek, [OP_WRITE_SAME] = (nfsd4_enc)nfsd4_encode_noop, [OP_CLONE] = (nfsd4_enc)nfsd4_encode_noop, + + /* RFC 8276 extended atributes operations */ + [OP_GETXATTR] = (nfsd4_enc)nfsd4_encode_getxattr, + [OP_SETXATTR] = (nfsd4_enc)nfsd4_encode_setxattr, + [OP_LISTXATTRS] = (nfsd4_enc)nfsd4_encode_listxattrs, + [OP_REMOVEXATTR] = (nfsd4_enc)nfsd4_encode_removexattr, }; /* @@ -4567,7 +5331,7 @@ __be32 nfsd4_check_resp_size(struct nfsd4_compoundres *resp, u32 respsize) void nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op) { - struct xdr_stream *xdr = &resp->xdr; + struct xdr_stream *xdr = resp->xdr; struct nfs4_stateowner *so = resp->cstate.replay_owner; struct svc_rqst *rqstp = resp->rqstp; const struct nfsd4_operation *opdesc = op->opdesc; @@ -4588,10 +5352,12 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op) if (op->status && opdesc && !(opdesc->op_flags & OP_NONTRIVIAL_ERROR_ENCODE)) goto status; - BUG_ON(op->opnum < 0 || op->opnum >= ARRAY_SIZE(nfsd4_enc_ops) || + BUG_ON(op->opnum >= ARRAY_SIZE(nfsd4_enc_ops) || !nfsd4_enc_ops[op->opnum]); encoder = nfsd4_enc_ops[op->opnum]; op->status = encoder(resp, op->status, &op->u); + if (op->status) + trace_nfsd_compound_encode_err(rqstp, op->opnum, op->status); if (opdesc && opdesc->op_release) opdesc->op_release(&op->u); xdr_commit_encode(xdr); @@ -4633,8 +5399,7 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op) so->so_replay.rp_buf, len); } status: - /* Note that op->status is already in network byte order: */ - write_bytes_to_xdr_buf(xdr->buf, post_err_offset - 4, &op->status, 4); + *p = op->status; } /* @@ -4660,22 +5425,14 @@ nfsd4_encode_replay(struct xdr_stream *xdr, struct nfsd4_op *op) p = xdr_encode_opaque_fixed(p, rp->rp_buf, rp->rp_buflen); } -int -nfs4svc_encode_voidres(struct svc_rqst *rqstp, __be32 *p) -{ - return xdr_ressize_check(rqstp, p); -} - void nfsd4_release_compoundargs(struct svc_rqst *rqstp) { struct nfsd4_compoundargs *args = rqstp->rq_argp; if (args->ops != args->iops) { - kfree(args->ops); + vfree(args->ops); args->ops = args->iops; } - kfree(args->tmpp); - args->tmpp = NULL; while (args->to_free) { struct svcxdr_tmpbuf *tb = args->to_free; args->to_free = tb->next; @@ -4683,56 +5440,42 @@ void nfsd4_release_compoundargs(struct svc_rqst *rqstp) } } -int -nfs4svc_decode_compoundargs(struct svc_rqst *rqstp, __be32 *p) +bool +nfs4svc_decode_compoundargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nfsd4_compoundargs *args = rqstp->rq_argp; - if (rqstp->rq_arg.head[0].iov_len % 4) { - /* client is nuts */ - dprintk("%s: compound not properly padded! (peeraddr=%pISc xid=0x%x)", - __func__, svc_addr(rqstp), be32_to_cpu(rqstp->rq_xid)); - return 0; - } - args->p = p; - args->end = rqstp->rq_arg.head[0].iov_base + rqstp->rq_arg.head[0].iov_len; - args->pagelist = rqstp->rq_arg.pages; - args->pagelen = rqstp->rq_arg.page_len; - args->tail = false; - args->tmpp = NULL; + /* svcxdr_tmp_alloc */ args->to_free = NULL; + + args->xdr = xdr; args->ops = args->iops; args->rqstp = rqstp; - return !nfsd4_decode_compound(args); + return nfsd4_decode_compound(args); } -int -nfs4svc_encode_compoundres(struct svc_rqst *rqstp, __be32 *p) +bool +nfs4svc_encode_compoundres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { + struct nfsd4_compoundres *resp = rqstp->rq_resp; + __be32 *p; + /* - * All that remains is to write the tag and operation count... + * Send buffer space for the following items is reserved + * at the top of nfsd4_proc_compound(). */ - struct nfsd4_compoundres *resp = rqstp->rq_resp; - struct xdr_buf *buf = resp->xdr.buf; + p = resp->statusp; - WARN_ON_ONCE(buf->len != buf->head[0].iov_len + buf->page_len + - buf->tail[0].iov_len); + *p++ = resp->cstate.status; - rqstp->rq_next_page = resp->xdr.page_ptr + 1; + rqstp->rq_next_page = xdr->page_ptr + 1; - p = resp->tagp; *p++ = htonl(resp->taglen); memcpy(p, resp->tag, resp->taglen); p += XDR_QUADLEN(resp->taglen); *p++ = htonl(resp->opcnt); nfsd4_sequence_done(resp); - return 1; + return true; } - -/* - * Local variables: - * c-basic-offset: 8 - * End: - */ diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c index 96352ab7bd81..3e64a3d50a1c 100644 --- a/fs/nfsd/nfscache.c +++ b/fs/nfsd/nfscache.c @@ -20,8 +20,7 @@ #include "nfsd.h" #include "cache.h" - -#define NFSDDBG_FACILITY NFSDDBG_REPCACHE +#include "trace.h" /* * We use this value to determine the number of hash buckets from the max @@ -36,6 +35,8 @@ struct nfsd_drc_bucket { spinlock_t cache_lock; }; +static struct kmem_cache *drc_slab; + static int nfsd_cache_append(struct svc_rqst *rqstp, struct kvec *vec); static unsigned long nfsd_reply_cache_count(struct shrinker *shrink, struct shrink_control *sc); @@ -83,19 +84,13 @@ nfsd_hashsize(unsigned int limit) return roundup_pow_of_two(limit / TARGET_BUCKET_SIZE); } -static u32 -nfsd_cache_hash(__be32 xid, struct nfsd_net *nn) -{ - return hash_32(be32_to_cpu(xid), nn->maskbits); -} - static struct svc_cacherep * nfsd_reply_cache_alloc(struct svc_rqst *rqstp, __wsum csum, struct nfsd_net *nn) { struct svc_cacherep *rp; - rp = kmem_cache_alloc(nn->drc_slab, GFP_KERNEL); + rp = kmem_cache_alloc(drc_slab, GFP_KERNEL); if (rp) { rp->c_state = RC_UNUSED; rp->c_type = RC_NOCACHE; @@ -120,16 +115,16 @@ nfsd_reply_cache_free_locked(struct nfsd_drc_bucket *b, struct svc_cacherep *rp, struct nfsd_net *nn) { if (rp->c_type == RC_REPLBUFF && rp->c_replvec.iov_base) { - nn->drc_mem_usage -= rp->c_replvec.iov_len; + nfsd_stats_drc_mem_usage_sub(nn, rp->c_replvec.iov_len); kfree(rp->c_replvec.iov_base); } if (rp->c_state != RC_UNUSED) { rb_erase(&rp->c_node, &b->rb_head); list_del(&rp->c_lru); atomic_dec(&nn->num_drc_entries); - nn->drc_mem_usage -= sizeof(*rp); + nfsd_stats_drc_mem_usage_sub(nn, sizeof(*rp)); } - kmem_cache_free(nn->drc_slab, rp); + kmem_cache_free(drc_slab, rp); } static void @@ -141,6 +136,28 @@ nfsd_reply_cache_free(struct nfsd_drc_bucket *b, struct svc_cacherep *rp, spin_unlock(&b->cache_lock); } +int nfsd_drc_slab_create(void) +{ + drc_slab = kmem_cache_create("nfsd_drc", + sizeof(struct svc_cacherep), 0, 0, NULL); + return drc_slab ? 0: -ENOMEM; +} + +void nfsd_drc_slab_free(void) +{ + kmem_cache_destroy(drc_slab); +} + +static int nfsd_reply_cache_stats_init(struct nfsd_net *nn) +{ + return nfsd_percpu_counters_init(nn->counter, NFSD_NET_COUNTERS_NUM); +} + +static void nfsd_reply_cache_stats_destroy(struct nfsd_net *nn) +{ + nfsd_percpu_counters_destroy(nn->counter, NFSD_NET_COUNTERS_NUM); +} + int nfsd_reply_cache_init(struct nfsd_net *nn) { unsigned int hashsize; @@ -152,27 +169,23 @@ int nfsd_reply_cache_init(struct nfsd_net *nn) hashsize = nfsd_hashsize(nn->max_drc_entries); nn->maskbits = ilog2(hashsize); + status = nfsd_reply_cache_stats_init(nn); + if (status) + goto out_nomem; + nn->nfsd_reply_cache_shrinker.scan_objects = nfsd_reply_cache_scan; nn->nfsd_reply_cache_shrinker.count_objects = nfsd_reply_cache_count; nn->nfsd_reply_cache_shrinker.seeks = 1; - status = register_shrinker(&nn->nfsd_reply_cache_shrinker); + status = register_shrinker(&nn->nfsd_reply_cache_shrinker, + "nfsd-reply:%s", nn->nfsd_name); if (status) - goto out_nomem; + goto out_stats_destroy; - nn->drc_slab = kmem_cache_create("nfsd_drc", - sizeof(struct svc_cacherep), 0, 0, NULL); - if (!nn->drc_slab) + nn->drc_hashtbl = kvzalloc(array_size(hashsize, + sizeof(*nn->drc_hashtbl)), GFP_KERNEL); + if (!nn->drc_hashtbl) goto out_shrinker; - nn->drc_hashtbl = kcalloc(hashsize, - sizeof(*nn->drc_hashtbl), GFP_KERNEL); - if (!nn->drc_hashtbl) { - nn->drc_hashtbl = vzalloc(array_size(hashsize, - sizeof(*nn->drc_hashtbl))); - if (!nn->drc_hashtbl) - goto out_slab; - } - for (i = 0; i < hashsize; i++) { INIT_LIST_HEAD(&nn->drc_hashtbl[i].lru_head); spin_lock_init(&nn->drc_hashtbl[i].cache_lock); @@ -180,10 +193,10 @@ int nfsd_reply_cache_init(struct nfsd_net *nn) nn->drc_hashsize = hashsize; return 0; -out_slab: - kmem_cache_destroy(nn->drc_slab); out_shrinker: unregister_shrinker(&nn->nfsd_reply_cache_shrinker); +out_stats_destroy: + nfsd_reply_cache_stats_destroy(nn); out_nomem: printk(KERN_ERR "nfsd: failed to allocate reply cache\n"); return -ENOMEM; @@ -204,13 +217,12 @@ void nfsd_reply_cache_shutdown(struct nfsd_net *nn) rp, nn); } } + nfsd_reply_cache_stats_destroy(nn); kvfree(nn->drc_hashtbl); nn->drc_hashtbl = NULL; nn->drc_hashsize = 0; - kmem_cache_destroy(nn->drc_slab); - nn->drc_slab = NULL; } /* @@ -224,8 +236,16 @@ lru_put_end(struct nfsd_drc_bucket *b, struct svc_cacherep *rp) list_move_tail(&rp->c_lru, &b->lru_head); } -static long -prune_bucket(struct nfsd_drc_bucket *b, struct nfsd_net *nn) +static noinline struct nfsd_drc_bucket * +nfsd_cache_bucket_find(__be32 xid, struct nfsd_net *nn) +{ + unsigned int hash = hash_32((__force u32)xid, nn->maskbits); + + return &nn->drc_hashtbl[hash]; +} + +static long prune_bucket(struct nfsd_drc_bucket *b, struct nfsd_net *nn, + unsigned int max) { struct svc_cacherep *rp, *tmp; long freed = 0; @@ -241,11 +261,17 @@ prune_bucket(struct nfsd_drc_bucket *b, struct nfsd_net *nn) time_before(jiffies, rp->c_timestamp + RC_EXPIRE)) break; nfsd_reply_cache_free_locked(b, rp, nn); - freed++; + if (max && freed++ > max) + break; } return freed; } +static long nfsd_prune_bucket(struct nfsd_drc_bucket *b, struct nfsd_net *nn) +{ + return prune_bucket(b, nn, 3); +} + /* * Walk the LRU list and prune off entries that are older than RC_EXPIRE. * Also prune the oldest ones when the total exceeds the max number of entries. @@ -262,7 +288,7 @@ prune_cache_entries(struct nfsd_net *nn) if (list_empty(&b->lru_head)) continue; spin_lock(&b->cache_lock); - freed += prune_bucket(b, nn); + freed += prune_bucket(b, nn, 0); spin_unlock(&b->cache_lock); } return freed; @@ -323,8 +349,10 @@ nfsd_cache_key_cmp(const struct svc_cacherep *key, const struct svc_cacherep *rp, struct nfsd_net *nn) { if (key->c_key.k_xid == rp->c_key.k_xid && - key->c_key.k_csum != rp->c_key.k_csum) - ++nn->payload_misses; + key->c_key.k_csum != rp->c_key.k_csum) { + nfsd_stats_payload_misses_inc(nn); + trace_nfsd_drc_mismatch(nn, key, rp); + } return memcmp(&key->c_key, &rp->c_key, sizeof(key->c_key)); } @@ -377,29 +405,34 @@ out: return ret; } -/* +/** + * nfsd_cache_lookup - Find an entry in the duplicate reply cache + * @rqstp: Incoming Call to find + * * Try to find an entry matching the current call in the cache. When none * is found, we try to grab the oldest expired entry off the LRU list. If * a suitable one isn't there, then drop the cache_lock and allocate a * new one, then search again in case one got inserted while this thread * didn't hold the lock. + * + * Return values: + * %RC_DOIT: Process the request normally + * %RC_REPLY: Reply from cache + * %RC_DROPIT: Do not process the request further */ -int -nfsd_cache_lookup(struct svc_rqst *rqstp) +int nfsd_cache_lookup(struct svc_rqst *rqstp) { - struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); + struct nfsd_net *nn; struct svc_cacherep *rp, *found; - __be32 xid = rqstp->rq_xid; __wsum csum; - u32 hash = nfsd_cache_hash(xid, nn); - struct nfsd_drc_bucket *b = &nn->drc_hashtbl[hash]; + struct nfsd_drc_bucket *b; int type = rqstp->rq_cachetype; int rtn = RC_DOIT; rqstp->rq_cacherep = NULL; if (type == RC_NOCACHE) { - nfsdstats.rcnocache++; - return rtn; + nfsd_stats_rc_nocache_inc(); + goto out; } csum = nfsd_cache_csum(rqstp); @@ -408,47 +441,47 @@ nfsd_cache_lookup(struct svc_rqst *rqstp) * Since the common case is a cache miss followed by an insert, * preallocate an entry. */ + nn = net_generic(SVC_NET(rqstp), nfsd_net_id); rp = nfsd_reply_cache_alloc(rqstp, csum, nn); - if (!rp) { - dprintk("nfsd: unable to allocate DRC entry!\n"); - return rtn; - } + if (!rp) + goto out; + b = nfsd_cache_bucket_find(rqstp->rq_xid, nn); spin_lock(&b->cache_lock); found = nfsd_cache_insert(b, rp, nn); - if (found != rp) { - nfsd_reply_cache_free_locked(NULL, rp, nn); - rp = found; + if (found != rp) goto found_entry; - } - nfsdstats.rcmisses++; + nfsd_stats_rc_misses_inc(); rqstp->rq_cacherep = rp; rp->c_state = RC_INPROG; atomic_inc(&nn->num_drc_entries); - nn->drc_mem_usage += sizeof(*rp); + nfsd_stats_drc_mem_usage_add(nn, sizeof(*rp)); - /* go ahead and prune the cache */ - prune_bucket(b, nn); - out: + nfsd_prune_bucket(b, nn); + +out_unlock: spin_unlock(&b->cache_lock); +out: return rtn; found_entry: /* We found a matching entry which is either in progress or done. */ - nfsdstats.rchits++; + nfsd_reply_cache_free_locked(NULL, rp, nn); + nfsd_stats_rc_hits_inc(); rtn = RC_DROPIT; + rp = found; /* Request being processed */ if (rp->c_state == RC_INPROG) - goto out; + goto out_trace; /* From the hall of fame of impractical attacks: * Is this a user who tries to snoop on the cache? */ rtn = RC_DOIT; if (!test_bit(RQ_SECURE, &rqstp->rq_flags) && rp->c_secure) - goto out; + goto out_trace; /* Compose RPC reply header */ switch (rp->c_type) { @@ -460,21 +493,26 @@ found_entry: break; case RC_REPLBUFF: if (!nfsd_cache_append(rqstp, &rp->c_replvec)) - goto out; /* should not happen */ + goto out_unlock; /* should not happen */ rtn = RC_REPLY; break; default: - printk(KERN_WARNING "nfsd: bad repcache type %d\n", rp->c_type); - nfsd_reply_cache_free_locked(b, rp, nn); + WARN_ONCE(1, "nfsd: bad repcache type %d\n", rp->c_type); } - goto out; +out_trace: + trace_nfsd_drc_found(nn, rqstp, rtn); + goto out_unlock; } -/* - * Update a cache entry. This is called from nfsd_dispatch when - * the procedure has been executed and the complete reply is in - * rqstp->rq_res. +/** + * nfsd_cache_update - Update an entry in the duplicate reply cache. + * @rqstp: svc_rqst with a finished Reply + * @cachetype: which cache to update + * @statp: Reply's status code + * + * This is called from nfsd_dispatch when the procedure has been + * executed and the complete reply is in rqstp->rq_res. * * We're copying around data here rather than swapping buffers because * the toplevel loop requires max-sized buffers, which would be a waste @@ -487,13 +525,11 @@ found_entry: * nfsd failed to encode a reply that otherwise would have been cached. * In this case, nfsd_cache_update is called with statp == NULL. */ -void -nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, __be32 *statp) +void nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, __be32 *statp) { struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); struct svc_cacherep *rp = rqstp->rq_cacherep; struct kvec *resv = &rqstp->rq_res.head[0], *cachv; - u32 hash; struct nfsd_drc_bucket *b; int len; size_t bufsize = 0; @@ -501,8 +537,7 @@ nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, __be32 *statp) if (!rp) return; - hash = nfsd_cache_hash(rp->c_key.k_xid, nn); - b = &nn->drc_hashtbl[hash]; + b = nfsd_cache_bucket_find(rp->c_key.k_xid, nn); len = resv->iov_len - ((char*)statp - (char*)resv->iov_base); len >>= 2; @@ -535,7 +570,7 @@ nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, __be32 *statp) return; } spin_lock(&b->cache_lock); - nn->drc_mem_usage += bufsize; + nfsd_stats_drc_mem_usage_add(nn, bufsize); lru_put_end(b, rp); rp->c_secure = test_bit(RQ_SECURE, &rqstp->rq_flags); rp->c_type = cachetype; @@ -569,28 +604,26 @@ nfsd_cache_append(struct svc_rqst *rqstp, struct kvec *data) * scraping this file for info should test the labels to ensure they're * getting the correct field. */ -static int nfsd_reply_cache_stats_show(struct seq_file *m, void *v) +int nfsd_reply_cache_stats_show(struct seq_file *m, void *v) { - struct nfsd_net *nn = m->private; + struct nfsd_net *nn = net_generic(file_inode(m->file)->i_sb->s_fs_info, + nfsd_net_id); seq_printf(m, "max entries: %u\n", nn->max_drc_entries); seq_printf(m, "num entries: %u\n", - atomic_read(&nn->num_drc_entries)); + atomic_read(&nn->num_drc_entries)); seq_printf(m, "hash buckets: %u\n", 1 << nn->maskbits); - seq_printf(m, "mem usage: %u\n", nn->drc_mem_usage); - seq_printf(m, "cache hits: %u\n", nfsdstats.rchits); - seq_printf(m, "cache misses: %u\n", nfsdstats.rcmisses); - seq_printf(m, "not cached: %u\n", nfsdstats.rcnocache); - seq_printf(m, "payload misses: %u\n", nn->payload_misses); + seq_printf(m, "mem usage: %lld\n", + percpu_counter_sum_positive(&nn->counter[NFSD_NET_DRC_MEM_USAGE])); + seq_printf(m, "cache hits: %lld\n", + percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_RC_HITS])); + seq_printf(m, "cache misses: %lld\n", + percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_RC_MISSES])); + seq_printf(m, "not cached: %lld\n", + percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_RC_NOCACHE])); + seq_printf(m, "payload misses: %lld\n", + percpu_counter_sum_positive(&nn->counter[NFSD_NET_PAYLOAD_MISSES])); seq_printf(m, "longest chain len: %u\n", nn->longest_chain); seq_printf(m, "cachesize at longest: %u\n", nn->longest_chain_cachesize); return 0; } - -int nfsd_reply_cache_stats_open(struct inode *inode, struct file *file) -{ - struct nfsd_net *nn = net_generic(file_inode(file)->i_sb->s_fs_info, - nfsd_net_id); - - return single_open(file, nfsd_reply_cache_stats_show, nn); -} diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index e109a1007704..dc74a947a440 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -25,6 +25,7 @@ #include "state.h" #include "netns.h" #include "pnfs.h" +#include "filecache.h" /* * We have a single directory with several nodes in it. @@ -32,6 +33,7 @@ enum { NFSD_Root = 1, NFSD_List, + NFSD_Export_Stats, NFSD_Export_features, NFSD_Fh, NFSD_FO_UnlockIP, @@ -44,6 +46,7 @@ enum { NFSD_Ports, NFSD_MaxBlkSize, NFSD_MaxConnections, + NFSD_Filecache, NFSD_SupportedEnctypes, /* * The below MUST come last. Otherwise we leave a hole in nfsd_files[] @@ -182,17 +185,7 @@ static int export_features_show(struct seq_file *m, void *v) return 0; } -static int export_features_open(struct inode *inode, struct file *file) -{ - return single_open(file, export_features_show, NULL); -} - -static const struct file_operations export_features_operations = { - .open = export_features_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; +DEFINE_SHOW_ATTRIBUTE(export_features); #if defined(CONFIG_SUNRPC_GSS) || defined(CONFIG_SUNRPC_GSS_MODULE) static int supported_enctypes_show(struct seq_file *m, void *v) @@ -201,17 +194,7 @@ static int supported_enctypes_show(struct seq_file *m, void *v) return 0; } -static int supported_enctypes_open(struct inode *inode, struct file *file) -{ - return single_open(file, supported_enctypes_show, NULL); -} - -static const struct file_operations supported_enctypes_ops = { - .open = supported_enctypes_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; +DEFINE_SHOW_ATTRIBUTE(supported_enctypes); #endif /* CONFIG_SUNRPC_GSS or CONFIG_SUNRPC_GSS_MODULE */ static const struct file_operations pool_stats_operations = { @@ -221,12 +204,9 @@ static const struct file_operations pool_stats_operations = { .release = nfsd_pool_stats_release, }; -static const struct file_operations reply_cache_stats_operations = { - .open = nfsd_reply_cache_stats_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; +DEFINE_SHOW_ATTRIBUTE(nfsd_reply_cache_stats); + +DEFINE_SHOW_ATTRIBUTE(nfsd_file_cache_stats); /*----------------------------------------------------------------------------*/ /* @@ -238,7 +218,7 @@ static inline struct net *netns(struct file *file) return file_inode(file)->i_sb->s_fs_info; } -/** +/* * write_unlock_ip - Release all locks used by a client * * Experimental. @@ -277,7 +257,7 @@ static ssize_t write_unlock_ip(struct file *file, char *buf, size_t size) return nlmsvc_unlock_all_by_ip(sap); } -/** +/* * write_unlock_fs - Release all locks on a local file system * * Experimental. @@ -327,7 +307,7 @@ static ssize_t write_unlock_fs(struct file *file, char *buf, size_t size) return error; } -/** +/* * write_filehandle - Get a variable-length NFS file handle by path * * On input, the buffer contains a '\n'-terminated C string comprised of @@ -351,7 +331,7 @@ static ssize_t write_unlock_fs(struct file *file, char *buf, size_t size) static ssize_t write_filehandle(struct file *file, char *buf, size_t size) { char *dname, *path; - int uninitialized_var(maxsize); + int maxsize; char *mesg = buf; int len; struct auth_domain *dom; @@ -394,15 +374,15 @@ static ssize_t write_filehandle(struct file *file, char *buf, size_t size) auth_domain_put(dom); if (len) return len; - + mesg = buf; len = SIMPLE_TRANSACTION_LIMIT; - qword_addhex(&mesg, &len, (char*)&fh.fh_base, fh.fh_size); + qword_addhex(&mesg, &len, fh.fh_raw, fh.fh_size); mesg[-1] = '\n'; - return mesg - buf; + return mesg - buf; } -/** +/* * write_threads - Start NFSD, or report the current number of running threads * * Input: @@ -452,7 +432,7 @@ static ssize_t write_threads(struct file *file, char *buf, size_t size) return scnprintf(buf, SIMPLE_TRANSACTION_LIMIT, "%d\n", rv); } -/** +/* * write_pool_threads - Set or report the current number of threads per pool * * Input: @@ -632,7 +612,6 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size) } /* Now write current state into reply buffer */ - len = 0; sep = ""; remaining = SIMPLE_TRANSACTION_LIMIT; for (num=2 ; num <= 4 ; num++) { @@ -661,7 +640,7 @@ out: return tlen + len; } -/** +/* * write_versions - Set or report the available NFS protocol versions * * Input: @@ -741,13 +720,12 @@ static ssize_t __write_ports_addfd(char *buf, struct net *net, const struct cred return err; err = svc_addsock(nn->nfsd_serv, fd, buf, SIMPLE_TRANSACTION_LIMIT, cred); - if (err < 0) { - nfsd_destroy(net); - return err; - } - /* Decrease the count, but don't shut down the service */ - nn->nfsd_serv->sv_nrthreads--; + if (err >= 0 && + !nn->nfsd_serv->sv_nrthreads && !xchg(&nn->keep_active, 1)) + svc_get(nn->nfsd_serv); + + nfsd_put(net); return err; } @@ -772,27 +750,29 @@ static ssize_t __write_ports_addxprt(char *buf, struct net *net, const struct cr if (err != 0) return err; - err = svc_create_xprt(nn->nfsd_serv, transport, net, - PF_INET, port, SVC_SOCK_ANONYMOUS, cred); + err = svc_xprt_create(nn->nfsd_serv, transport, net, + PF_INET, port, SVC_SOCK_ANONYMOUS, cred); if (err < 0) goto out_err; - err = svc_create_xprt(nn->nfsd_serv, transport, net, - PF_INET6, port, SVC_SOCK_ANONYMOUS, cred); + err = svc_xprt_create(nn->nfsd_serv, transport, net, + PF_INET6, port, SVC_SOCK_ANONYMOUS, cred); if (err < 0 && err != -EAFNOSUPPORT) goto out_close; - /* Decrease the count, but don't shut down the service */ - nn->nfsd_serv->sv_nrthreads--; + if (!nn->nfsd_serv->sv_nrthreads && !xchg(&nn->keep_active, 1)) + svc_get(nn->nfsd_serv); + + nfsd_put(net); return 0; out_close: xprt = svc_find_xprt(nn->nfsd_serv, transport, net, PF_INET, port); if (xprt != NULL) { - svc_close_xprt(xprt); + svc_xprt_close(xprt); svc_xprt_put(xprt); } out_err: - nfsd_destroy(net); + nfsd_put(net); return err; } @@ -811,7 +791,7 @@ static ssize_t __write_ports(struct file *file, char *buf, size_t size, return -EINVAL; } -/** +/* * write_ports - Pass a socket file descriptor or transport name to listen on * * Input: @@ -867,7 +847,7 @@ static ssize_t write_ports(struct file *file, char *buf, size_t size) int nfsd_max_blksize; -/** +/* * write_maxblksize - Set or report the current NFS blksize * * Input: @@ -917,7 +897,7 @@ static ssize_t write_maxblksize(struct file *file, char *buf, size_t size) nfsd_max_blksize); } -/** +/* * write_maxconn - Set or report the current max number of connections * * Input: @@ -998,7 +978,7 @@ static ssize_t nfsd4_write_time(struct file *file, char *buf, size_t size, return rv; } -/** +/* * write_leasetime - Set or report the current NFSv4 lease time * * Input: @@ -1025,7 +1005,7 @@ static ssize_t write_leasetime(struct file *file, char *buf, size_t size) return nfsd4_write_time(file, buf, size, &nn->nfsd4_lease, nn); } -/** +/* * write_gracetime - Set or report current NFSv4 grace period time * * As above, but sets the time of the NFSv4 grace period. @@ -1069,7 +1049,7 @@ static ssize_t __write_recoverydir(struct file *file, char *buf, size_t size, nfs4_recoverydir()); } -/** +/* * write_recoverydir - Set or report the pathname of the recovery directory * * Input: @@ -1101,7 +1081,7 @@ static ssize_t write_recoverydir(struct file *file, char *buf, size_t size) return rv; } -/** +/* * write_v4_end_grace - release grace period for nfsd's v4.x lock manager * * Input: @@ -1165,6 +1145,7 @@ static struct inode *nfsd_get_inode(struct super_block *sb, umode_t mode) inode->i_fop = &simple_dir_operations; inode->i_op = &simple_dir_inode_operations; inc_nlink(inode); + break; default: break; } @@ -1244,7 +1225,8 @@ static void nfsdfs_remove_file(struct inode *dir, struct dentry *dentry) clear_ncl(d_inode(dentry)); dget(dentry); ret = simple_unlink(dir, dentry); - d_delete(dentry); + d_drop(dentry); + fsnotify_unlink(dir, dentry); dput(dentry); WARN_ON_ONCE(ret); } @@ -1265,7 +1247,8 @@ static void nfsdfs_remove_files(struct dentry *root) /* XXX: cut'n'paste from simple_fill_super; figure out if we could share * code instead. */ static int nfsdfs_create_files(struct dentry *root, - const struct tree_descr *files) + const struct tree_descr *files, + struct dentry **fdentries) { struct inode *dir = d_inode(root); struct inode *inode; @@ -1274,8 +1257,6 @@ static int nfsdfs_create_files(struct dentry *root, inode_lock(dir); for (i = 0; files->name && files->name[0]; i++, files++) { - if (!files->name) - continue; dentry = d_alloc_name(root, files->name); if (!dentry) goto out; @@ -1289,6 +1270,8 @@ static int nfsdfs_create_files(struct dentry *root, inode->i_private = __get_nfsdfs_client(dir); d_add(dentry, inode); fsnotify_create(dir, dentry); + if (fdentries) + fdentries[i] = dentry; } inode_unlock(dir); return 0; @@ -1300,8 +1283,9 @@ out: /* on success, returns positive number unique to that client. */ struct dentry *nfsd_client_mkdir(struct nfsd_net *nn, - struct nfsdfs_client *ncl, u32 id, - const struct tree_descr *files) + struct nfsdfs_client *ncl, u32 id, + const struct tree_descr *files, + struct dentry **fdentries) { struct dentry *dentry; char name[11]; @@ -1312,7 +1296,7 @@ struct dentry *nfsd_client_mkdir(struct nfsd_net *nn, dentry = nfsd_mkdir(nn->nfsd_client_dir, ncl, name); if (IS_ERR(dentry)) /* XXX: tossing errors? */ return NULL; - ret = nfsdfs_create_files(dentry, files); + ret = nfsdfs_create_files(dentry, files, fdentries); if (ret) { nfsd_client_rmdir(dentry); return NULL; @@ -1333,7 +1317,9 @@ void nfsd_client_rmdir(struct dentry *dentry) dget(dentry); ret = simple_rmdir(dir, dentry); WARN_ON_ONCE(ret); - d_delete(dentry); + d_drop(dentry); + fsnotify_rmdir(dir, dentry); + dput(dentry); inode_unlock(dir); } @@ -1346,8 +1332,10 @@ static int nfsd_fill_super(struct super_block *sb, struct fs_context *fc) static const struct tree_descr nfsd_files[] = { [NFSD_List] = {"exports", &exports_nfsd_operations, S_IRUGO}, + /* Per-export io stats use same ops as exports file */ + [NFSD_Export_Stats] = {"export_stats", &exports_nfsd_operations, S_IRUGO}, [NFSD_Export_features] = {"export_features", - &export_features_operations, S_IRUGO}, + &export_features_fops, S_IRUGO}, [NFSD_FO_UnlockIP] = {"unlock_ip", &transaction_ops, S_IWUSR|S_IRUSR}, [NFSD_FO_UnlockFS] = {"unlock_filesystem", @@ -1356,13 +1344,16 @@ static int nfsd_fill_super(struct super_block *sb, struct fs_context *fc) [NFSD_Threads] = {"threads", &transaction_ops, S_IWUSR|S_IRUSR}, [NFSD_Pool_Threads] = {"pool_threads", &transaction_ops, S_IWUSR|S_IRUSR}, [NFSD_Pool_Stats] = {"pool_stats", &pool_stats_operations, S_IRUGO}, - [NFSD_Reply_Cache_Stats] = {"reply_cache_stats", &reply_cache_stats_operations, S_IRUGO}, + [NFSD_Reply_Cache_Stats] = {"reply_cache_stats", + &nfsd_reply_cache_stats_fops, S_IRUGO}, [NFSD_Versions] = {"versions", &transaction_ops, S_IWUSR|S_IRUSR}, [NFSD_Ports] = {"portlist", &transaction_ops, S_IWUSR|S_IRUGO}, [NFSD_MaxBlkSize] = {"max_block_size", &transaction_ops, S_IWUSR|S_IRUGO}, [NFSD_MaxConnections] = {"max_connections", &transaction_ops, S_IWUSR|S_IRUGO}, + [NFSD_Filecache] = {"filecache", &nfsd_file_cache_stats_fops, S_IRUGO}, #if defined(CONFIG_SUNRPC_GSS) || defined(CONFIG_SUNRPC_GSS_MODULE) - [NFSD_SupportedEnctypes] = {"supported_krb5_enctypes", &supported_enctypes_ops, S_IRUGO}, + [NFSD_SupportedEnctypes] = {"supported_krb5_enctypes", + &supported_enctypes_fops, S_IRUGO}, #endif /* CONFIG_SUNRPC_GSS or CONFIG_SUNRPC_GSS_MODULE */ #ifdef CONFIG_NFSD_V4 [NFSD_Leasetime] = {"nfsv4leasetime", &transaction_ops, S_IWUSR|S_IRUSR}, @@ -1411,6 +1402,8 @@ static void nfsd_umount(struct super_block *sb) { struct net *net = sb->s_fs_info; + nfsd_shutdown_threads(net); + kill_litter_super(sb); put_net(net); } @@ -1450,7 +1443,6 @@ unsigned int nfsd_net_id; static __net_init int nfsd_init_net(struct net *net) { int retval; - struct vfsmount *mnt; struct nfsd_net *nn = net_generic(net, nfsd_net_id); retval = nfsd_export_init(net); @@ -1461,32 +1453,19 @@ static __net_init int nfsd_init_net(struct net *net) goto out_idmap_error; nn->nfsd_versions = NULL; nn->nfsd4_minorversions = NULL; - retval = nfsd_reply_cache_init(nn); + retval = nfsd4_init_leases_net(nn); if (retval) goto out_drc_error; - nn->nfsd4_lease = 90; /* default lease time */ - nn->nfsd4_grace = 90; - nn->somebody_reclaimed = false; - nn->track_reclaim_completes = false; - nn->clverifier_counter = prandom_u32(); - nn->clientid_base = prandom_u32(); - nn->clientid_counter = nn->clientid_base + 1; - nn->s2s_cp_cl_id = nn->clientid_counter++; - - atomic_set(&nn->ntf_refcnt, 0); - init_waitqueue_head(&nn->ntf_wq); - seqlock_init(&nn->boot_lock); - - mnt = vfs_kern_mount(&nfsd_fs_type, SB_KERNMOUNT, "nfsd", NULL); - if (IS_ERR(mnt)) { - retval = PTR_ERR(mnt); - goto out_mount_err; - } - nn->nfsd_mnt = mnt; + retval = nfsd_reply_cache_init(nn); + if (retval) + goto out_cache_error; + get_random_bytes(&nn->siphash_key, sizeof(nn->siphash_key)); + seqlock_init(&nn->writeverf_lock); + return 0; -out_mount_err: - nfsd_reply_cache_shutdown(nn); +out_cache_error: + nfsd4_leases_net_shutdown(nn); out_drc_error: nfsd_idmap_shutdown(net); out_idmap_error: @@ -1499,11 +1478,11 @@ static __net_exit void nfsd_exit_net(struct net *net) { struct nfsd_net *nn = net_generic(net, nfsd_net_id); - mntput(nn->nfsd_mnt); nfsd_reply_cache_shutdown(nn); nfsd_idmap_shutdown(net); nfsd_export_shutdown(net); nfsd_netns_free_versions(net_generic(net, nfsd_net_id)); + nfsd4_leases_net_shutdown(nn); } static struct pernet_operations nfsd_net_ops = { @@ -1516,59 +1495,70 @@ static struct pernet_operations nfsd_net_ops = { static int __init init_nfsd(void) { int retval; - printk(KERN_INFO "Installing knfsd (copyright (C) 1996 okir@monad.swb.de).\n"); - retval = register_pernet_subsys(&nfsd_net_ops); - if (retval < 0) - return retval; - retval = register_cld_notifier(); - if (retval) - goto out_unregister_pernet; retval = nfsd4_init_slabs(); if (retval) - goto out_unregister_notifier; + return retval; retval = nfsd4_init_pnfs(); if (retval) goto out_free_slabs; - nfsd_fault_inject_init(); /* nfsd fault injection controls */ - nfsd_stat_init(); /* Statistics */ + retval = nfsd_stat_init(); /* Statistics */ + if (retval) + goto out_free_pnfs; + retval = nfsd_drc_slab_create(); + if (retval) + goto out_free_stat; nfsd_lockd_init(); /* lockd->nfsd callbacks */ retval = create_proc_exports_entry(); if (retval) goto out_free_lockd; + retval = register_pernet_subsys(&nfsd_net_ops); + if (retval < 0) + goto out_free_exports; + retval = register_cld_notifier(); + if (retval) + goto out_free_subsys; + retval = nfsd4_create_laundry_wq(); + if (retval) + goto out_free_cld; retval = register_filesystem(&nfsd_fs_type); if (retval) goto out_free_all; return 0; out_free_all: + nfsd4_destroy_laundry_wq(); +out_free_cld: + unregister_cld_notifier(); +out_free_subsys: + unregister_pernet_subsys(&nfsd_net_ops); +out_free_exports: remove_proc_entry("fs/nfs/exports", NULL); remove_proc_entry("fs/nfs", NULL); out_free_lockd: nfsd_lockd_shutdown(); + nfsd_drc_slab_free(); +out_free_stat: nfsd_stat_shutdown(); - nfsd_fault_inject_cleanup(); +out_free_pnfs: nfsd4_exit_pnfs(); out_free_slabs: nfsd4_free_slabs(); -out_unregister_notifier: - unregister_cld_notifier(); -out_unregister_pernet: - unregister_pernet_subsys(&nfsd_net_ops); return retval; } static void __exit exit_nfsd(void) { + unregister_filesystem(&nfsd_fs_type); + nfsd4_destroy_laundry_wq(); + unregister_cld_notifier(); + unregister_pernet_subsys(&nfsd_net_ops); + nfsd_drc_slab_free(); remove_proc_entry("fs/nfs/exports", NULL); remove_proc_entry("fs/nfs", NULL); nfsd_stat_shutdown(); nfsd_lockd_shutdown(); nfsd4_free_slabs(); nfsd4_exit_pnfs(); - nfsd_fault_inject_cleanup(); - unregister_filesystem(&nfsd_fs_type); - unregister_cld_notifier(); - unregister_pernet_subsys(&nfsd_net_ops); } MODULE_AUTHOR("Olaf Kirch <okir@monad.swb.de>"); diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index 2ab5569126b8..09726c5b9a31 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h @@ -24,8 +24,8 @@ #include <uapi/linux/nfsd/debug.h> #include "netns.h" -#include "stats.h" #include "export.h" +#include "stats.h" #undef ifdebug #ifdef CONFIG_SUNRPC_DEBUG @@ -74,6 +74,16 @@ extern unsigned long nfsd_drc_mem_used; extern const struct seq_operations nfs_exports_op; /* + * Common void argument and result helpers + */ +struct nfsd_voidargs { }; +struct nfsd_voidres { }; +bool nfssvc_decode_voidarg(struct svc_rqst *rqstp, + struct xdr_stream *xdr); +bool nfssvc_encode_voidres(struct svc_rqst *rqstp, + struct xdr_stream *xdr); + +/* * Function prototypes. */ int nfsd_svc(int nrservs, struct net *net, const struct cred *cred); @@ -85,8 +95,11 @@ int nfsd_get_nrthreads(int n, int *, struct net *); int nfsd_set_nrthreads(int n, int *, struct net *); int nfsd_pool_stats_open(struct inode *, struct file *); int nfsd_pool_stats_release(struct inode *, struct file *); +void nfsd_shutdown_threads(struct net *net); + +void nfsd_put(struct net *net); -void nfsd_destroy(struct net *net); +bool i_am_nfsd(void); struct nfsdfs_client { struct kref cl_ref; @@ -95,9 +108,12 @@ struct nfsdfs_client { struct nfsdfs_client *get_nfsdfs_client(struct inode *); struct dentry *nfsd_client_mkdir(struct nfsd_net *nn, - struct nfsdfs_client *ncl, u32 id, const struct tree_descr *); + struct nfsdfs_client *ncl, u32 id, + const struct tree_descr *, + struct dentry **fdentries); void nfsd_client_rmdir(struct dentry *dentry); + #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) #ifdef CONFIG_NFSD_V2_ACL extern const struct svc_version nfsd_acl_version2; @@ -146,6 +162,9 @@ void nfs4_state_shutdown_net(struct net *net); int nfs4_reset_recoverydir(char *recdir); char * nfs4_recoverydir(void); bool nfsd4_spo_must_allow(struct svc_rqst *rqstp); +int nfsd4_create_laundry_wq(void); +void nfsd4_destroy_laundry_wq(void); +bool nfsd_wait_for_delegreturn(struct svc_rqst *rqstp, struct inode *inode); #else static inline int nfsd4_init_slabs(void) { return 0; } static inline void nfsd4_free_slabs(void) { } @@ -159,6 +178,13 @@ static inline bool nfsd4_spo_must_allow(struct svc_rqst *rqstp) { return false; } +static inline int nfsd4_create_laundry_wq(void) { return 0; }; +static inline void nfsd4_destroy_laundry_wq(void) {}; +static inline bool nfsd_wait_for_delegreturn(struct svc_rqst *rqstp, + struct inode *inode) +{ + return false; +} #endif /* @@ -281,6 +307,8 @@ void nfsd_lockd_shutdown(void); #define nfserr_wrong_lfs cpu_to_be32(NFS4ERR_WRONG_LFS) #define nfserr_badlabel cpu_to_be32(NFS4ERR_BADLABEL) #define nfserr_file_open cpu_to_be32(NFS4ERR_FILE_OPEN) +#define nfserr_xattr2big cpu_to_be32(NFS4ERR_XATTR2BIG) +#define nfserr_noxattr cpu_to_be32(NFS4ERR_NOXATTR) /* error codes for internal use */ /* if a request fails due to kmalloc failure, it gets dropped. @@ -318,6 +346,10 @@ void nfsd_lockd_shutdown(void); #define COMPOUND_ERR_SLACK_SPACE 16 /* OP_SETATTR */ #define NFSD_LAUNDROMAT_MINTIMEOUT 1 /* seconds */ +#define NFSD_COURTESY_CLIENT_TIMEOUT (24 * 60 * 60) /* seconds */ +#define NFSD_CLIENT_MAX_TRIM_PER_RUN 128 +#define NFS4_CLIENTS_PER_GB 1024 +#define NFSD_DELEGRETURN_TIMEOUT (HZ / 34) /* 30ms */ /* * The following attributes are currently not supported by the NFSv4 server: @@ -346,7 +378,7 @@ void nfsd_lockd_shutdown(void); | FATTR4_WORD1_OWNER | FATTR4_WORD1_OWNER_GROUP | FATTR4_WORD1_RAWDEV \ | FATTR4_WORD1_SPACE_AVAIL | FATTR4_WORD1_SPACE_FREE | FATTR4_WORD1_SPACE_TOTAL \ | FATTR4_WORD1_SPACE_USED | FATTR4_WORD1_TIME_ACCESS | FATTR4_WORD1_TIME_ACCESS_SET \ - | FATTR4_WORD1_TIME_DELTA | FATTR4_WORD1_TIME_METADATA \ + | FATTR4_WORD1_TIME_DELTA | FATTR4_WORD1_TIME_METADATA | FATTR4_WORD1_TIME_CREATE \ | FATTR4_WORD1_TIME_MODIFY | FATTR4_WORD1_TIME_MODIFY_SET | FATTR4_WORD1_MOUNTED_ON_FILEID) #define NFSD4_SUPPORTED_ATTRS_WORD2 0 @@ -380,9 +412,9 @@ void nfsd_lockd_shutdown(void); #define NFSD4_2_SUPPORTED_ATTRS_WORD2 \ (NFSD4_1_SUPPORTED_ATTRS_WORD2 | \ - FATTR4_WORD2_CHANGE_ATTR_TYPE | \ FATTR4_WORD2_MODE_UMASK | \ - NFSD4_2_SECURITY_ATTRS) + NFSD4_2_SECURITY_ATTRS | \ + FATTR4_WORD2_XATTR_SUPPORT) extern const u32 nfsd_suppattrs[3][3]; @@ -442,7 +474,8 @@ static inline bool nfsd_attrs_supported(u32 minorversion, const u32 *bmval) (FATTR4_WORD0_SIZE | FATTR4_WORD0_ACL) #define NFSD_WRITEABLE_ATTRS_WORD1 \ (FATTR4_WORD1_MODE | FATTR4_WORD1_OWNER | FATTR4_WORD1_OWNER_GROUP \ - | FATTR4_WORD1_TIME_ACCESS_SET | FATTR4_WORD1_TIME_MODIFY_SET) + | FATTR4_WORD1_TIME_ACCESS_SET | FATTR4_WORD1_TIME_CREATE \ + | FATTR4_WORD1_TIME_MODIFY_SET) #ifdef CONFIG_NFSD_V4_SECURITY_LABEL #define MAYBE_FATTR4_WORD2_SECURITY_LABEL \ FATTR4_WORD2_SECURITY_LABEL @@ -468,12 +501,22 @@ static inline bool nfsd_attrs_supported(u32 minorversion, const u32 *bmval) extern int nfsd4_is_junction(struct dentry *dentry); extern int register_cld_notifier(void); extern void unregister_cld_notifier(void); +#ifdef CONFIG_NFSD_V4_2_INTER_SSC +extern void nfsd4_ssc_init_umount_work(struct nfsd_net *nn); +#endif + +extern int nfsd4_init_leases_net(struct nfsd_net *nn); +extern void nfsd4_leases_net_shutdown(struct nfsd_net *nn); + #else /* CONFIG_NFSD_V4 */ static inline int nfsd4_is_junction(struct dentry *dentry) { return 0; } +static inline int nfsd4_init_leases_net(struct nfsd_net *nn) { return 0; }; +static inline void nfsd4_leases_net_shutdown(struct nfsd_net *nn) {}; + #define register_cld_notifier() 0 #define unregister_cld_notifier() do { } while(0) diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index b319080288c3..8c52b6c9d31a 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c @@ -14,6 +14,7 @@ #include "nfsd.h" #include "vfs.h" #include "auth.h" +#include "trace.h" #define NFSDDBG_FACILITY NFSDDBG_FH @@ -39,7 +40,8 @@ static int nfsd_acceptable(void *expv, struct dentry *dentry) /* make sure parents give x permission to user */ int err; parent = dget_parent(tdentry); - err = inode_permission(d_inode(parent), MAY_EXEC); + err = inode_permission(&init_user_ns, + d_inode(parent), MAY_EXEC); if (err < 0) { dput(parent); break; @@ -152,11 +154,12 @@ static inline __be32 check_pseudo_root(struct svc_rqst *rqstp, static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp) { struct knfsd_fh *fh = &fhp->fh_handle; - struct fid *fid = NULL, sfid; + struct fid *fid = NULL; struct svc_export *exp; struct dentry *dentry; int fileid_type; int data_left = fh->fh_size/4; + int len; __be32 error; error = nfserr_stale; @@ -165,55 +168,45 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp) if (rqstp->rq_vers == 4 && fh->fh_size == 0) return nfserr_nofilehandle; - if (fh->fh_version == 1) { - int len; - - if (--data_left < 0) - return error; - if (fh->fh_auth_type != 0) - return error; - len = key_len(fh->fh_fsid_type) / 4; - if (len == 0) - return error; - if (fh->fh_fsid_type == FSID_MAJOR_MINOR) { - /* deprecated, convert to type 3 */ - len = key_len(FSID_ENCODE_DEV)/4; - fh->fh_fsid_type = FSID_ENCODE_DEV; - /* - * struct knfsd_fh uses host-endian fields, which are - * sometimes used to hold net-endian values. This - * confuses sparse, so we must use __force here to - * keep it from complaining. - */ - fh->fh_fsid[0] = new_encode_dev(MKDEV(ntohl((__force __be32)fh->fh_fsid[0]), - ntohl((__force __be32)fh->fh_fsid[1]))); - fh->fh_fsid[1] = fh->fh_fsid[2]; - } - data_left -= len; - if (data_left < 0) - return error; - exp = rqst_exp_find(rqstp, fh->fh_fsid_type, fh->fh_fsid); - fid = (struct fid *)(fh->fh_fsid + len); - } else { - __u32 tfh[2]; - dev_t xdev; - ino_t xino; + if (fh->fh_version != 1) + return error; - if (fh->fh_size != NFS_FHSIZE) - return error; - /* assume old filehandle format */ - xdev = old_decode_dev(fh->ofh_xdev); - xino = u32_to_ino_t(fh->ofh_xino); - mk_fsid(FSID_DEV, tfh, xdev, xino, 0, NULL); - exp = rqst_exp_find(rqstp, FSID_DEV, tfh); + if (--data_left < 0) + return error; + if (fh->fh_auth_type != 0) + return error; + len = key_len(fh->fh_fsid_type) / 4; + if (len == 0) + return error; + if (fh->fh_fsid_type == FSID_MAJOR_MINOR) { + /* deprecated, convert to type 3 */ + len = key_len(FSID_ENCODE_DEV)/4; + fh->fh_fsid_type = FSID_ENCODE_DEV; + /* + * struct knfsd_fh uses host-endian fields, which are + * sometimes used to hold net-endian values. This + * confuses sparse, so we must use __force here to + * keep it from complaining. + */ + fh->fh_fsid[0] = new_encode_dev(MKDEV(ntohl((__force __be32)fh->fh_fsid[0]), + ntohl((__force __be32)fh->fh_fsid[1]))); + fh->fh_fsid[1] = fh->fh_fsid[2]; } + data_left -= len; + if (data_left < 0) + return error; + exp = rqst_exp_find(rqstp, fh->fh_fsid_type, fh->fh_fsid); + fid = (struct fid *)(fh->fh_fsid + len); error = nfserr_stale; - if (PTR_ERR(exp) == -ENOENT) - return error; + if (IS_ERR(exp)) { + trace_nfsd_set_fh_dentry_badexport(rqstp, fhp, PTR_ERR(exp)); + + if (PTR_ERR(exp) == -ENOENT) + return error; - if (IS_ERR(exp)) return nfserrno(PTR_ERR(exp)); + } if (exp->ex_flags & NFSEXP_NOSUBTREECHECK) { /* Elevate privileges so that the lack of 'r' or 'x' @@ -248,25 +241,25 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp) if (rqstp->rq_vers > 2) error = nfserr_badhandle; - if (fh->fh_version != 1) { - sfid.i32.ino = fh->ofh_ino; - sfid.i32.gen = fh->ofh_generation; - sfid.i32.parent_ino = fh->ofh_dirino; - fid = &sfid; - data_left = 3; - if (fh->ofh_dirino == 0) - fileid_type = FILEID_INO32_GEN; - else - fileid_type = FILEID_INO32_GEN_PARENT; - } else - fileid_type = fh->fh_fileid_type; + fileid_type = fh->fh_fileid_type; if (fileid_type == FILEID_ROOT) dentry = dget(exp->ex_path.dentry); else { - dentry = exportfs_decode_fh(exp->ex_path.mnt, fid, - data_left, fileid_type, - nfsd_acceptable, exp); + dentry = exportfs_decode_fh_raw(exp->ex_path.mnt, fid, + data_left, fileid_type, + nfsd_acceptable, exp); + if (IS_ERR_OR_NULL(dentry)) { + trace_nfsd_set_fh_dentry_badhandle(rqstp, fhp, + dentry ? PTR_ERR(dentry) : -ESTALE); + switch (PTR_ERR(dentry)) { + case -ENOMEM: + case -ETIMEDOUT: + break; + default: + dentry = ERR_PTR(-ESTALE); + } + } } if (dentry == NULL) goto out; @@ -284,6 +277,20 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp) fhp->fh_dentry = dentry; fhp->fh_export = exp; + + switch (rqstp->rq_vers) { + case 4: + if (dentry->d_sb->s_export_op->flags & EXPORT_OP_NOATOMIC_ATTR) + fhp->fh_no_atomic_attr = true; + break; + case 3: + if (dentry->d_sb->s_export_op->flags & EXPORT_OP_NOWCC) + fhp->fh_no_wcc = true; + break; + case 2: + fhp->fh_no_wcc = true; + } + return 0; out: exp_put(exp); @@ -320,12 +327,10 @@ out: __be32 fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, int access) { - struct svc_export *exp; + struct svc_export *exp = NULL; struct dentry *dentry; __be32 error; - dprintk("nfsd: fh_verify(%s)\n", SVCFH_fmt(fhp)); - if (!fhp->fh_dentry) { error = nfsd_set_fh_dentry(rqstp, fhp); if (error) @@ -333,6 +338,9 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, int access) } dentry = fhp->fh_dentry; exp = fhp->fh_export; + + trace_nfsd_fh_verify(rqstp, fhp, type, access); + /* * We still have to do all these permission checks, even when * fh_dentry is already set: @@ -384,16 +392,10 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, int access) skip_pseudoflavor_check: /* Finally, check access permissions. */ error = nfsd_permission(rqstp, exp, dentry, access); - - if (error) { - dprintk("fh_verify: %pd2 permission failure, " - "acc=%x, error=%d\n", - dentry, - access, ntohl(error)); - } out: + trace_nfsd_fh_verify_err(rqstp, fhp, type, access, error); if (error == nfserr_stale) - nfsdstats.fh_stale++; + nfsd_stats_fh_stale_inc(exp); return error; } @@ -422,20 +424,6 @@ static void _fh_update(struct svc_fh *fhp, struct svc_export *exp, } } -/* - * for composing old style file handles - */ -static inline void _fh_update_old(struct dentry *dentry, - struct svc_export *exp, - struct knfsd_fh *fh) -{ - fh->ofh_ino = ino_t_to_u32(d_inode(dentry)->i_ino); - fh->ofh_generation = d_inode(dentry)->i_generation; - if (d_is_dir(dentry) || - (exp->ex_flags & NFSEXP_NOSUBTREECHECK)) - fh->ofh_dirino = 0; -} - static bool is_root_export(struct svc_export *exp) { return exp->ex_path.dentry == exp->ex_path.dentry->d_sb->s_root; @@ -452,7 +440,7 @@ static bool fsid_type_ok_for_exp(u8 fsid_type, struct svc_export *exp) case FSID_DEV: if (!old_valid_dev(exp_sb(exp)->s_dev)) return false; - /* FALL THROUGH */ + fallthrough; case FSID_MAJOR_MINOR: case FSID_ENCODE_DEV: return exp_sb(exp)->s_type->fs_flags & FS_REQUIRES_DEV; @@ -462,7 +450,7 @@ static bool fsid_type_ok_for_exp(u8 fsid_type, struct svc_export *exp) case FSID_UUID16: if (!is_root_export(exp)) return false; - /* fall through */ + fallthrough; case FSID_UUID4_INUM: case FSID_UUID16_INUM: return exp->ex_uuid != NULL; @@ -532,9 +520,6 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry, /* ref_fh is a reference file handle. * if it is non-null and for the same filesystem, then we should compose * a filehandle which is of the same version, where possible. - * Currently, that means that if ref_fh->fh_handle.fh_version == 0xca - * Then create a 32byte filehandle using nfs_fhbase_old - * */ struct inode * inode = d_inode(dentry); @@ -552,10 +537,13 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry, */ set_version_and_fsid_type(fhp, exp, ref_fh); + /* If we have a ref_fh, then copy the fh_no_wcc setting from it. */ + fhp->fh_no_wcc = ref_fh ? ref_fh->fh_no_wcc : false; + if (ref_fh == fhp) fh_put(ref_fh); - if (fhp->fh_locked || fhp->fh_dentry) { + if (fhp->fh_dentry) { printk(KERN_ERR "fh_compose: fh %pd2 not initialized!\n", dentry); } @@ -567,35 +555,21 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry, fhp->fh_dentry = dget(dentry); /* our internal copy */ fhp->fh_export = exp_get(exp); - if (fhp->fh_handle.fh_version == 0xca) { - /* old style filehandle please */ - memset(&fhp->fh_handle.fh_base, 0, NFS_FHSIZE); - fhp->fh_handle.fh_size = NFS_FHSIZE; - fhp->fh_handle.ofh_dcookie = 0xfeebbaca; - fhp->fh_handle.ofh_dev = old_encode_dev(ex_dev); - fhp->fh_handle.ofh_xdev = fhp->fh_handle.ofh_dev; - fhp->fh_handle.ofh_xino = - ino_t_to_u32(d_inode(exp->ex_path.dentry)->i_ino); - fhp->fh_handle.ofh_dirino = ino_t_to_u32(parent_ino(dentry)); - if (inode) - _fh_update_old(dentry, exp, &fhp->fh_handle); - } else { - fhp->fh_handle.fh_size = - key_len(fhp->fh_handle.fh_fsid_type) + 4; - fhp->fh_handle.fh_auth_type = 0; - - mk_fsid(fhp->fh_handle.fh_fsid_type, - fhp->fh_handle.fh_fsid, - ex_dev, - d_inode(exp->ex_path.dentry)->i_ino, - exp->ex_fsid, exp->ex_uuid); - - if (inode) - _fh_update(fhp, exp, dentry); - if (fhp->fh_handle.fh_fileid_type == FILEID_INVALID) { - fh_put(fhp); - return nfserr_opnotsupp; - } + fhp->fh_handle.fh_size = + key_len(fhp->fh_handle.fh_fsid_type) + 4; + fhp->fh_handle.fh_auth_type = 0; + + mk_fsid(fhp->fh_handle.fh_fsid_type, + fhp->fh_handle.fh_fsid, + ex_dev, + d_inode(exp->ex_path.dentry)->i_ino, + exp->ex_fsid, exp->ex_uuid); + + if (inode) + _fh_update(fhp, exp, dentry); + if (fhp->fh_handle.fh_fileid_type == FILEID_INVALID) { + fh_put(fhp); + return nfserr_opnotsupp; } return 0; @@ -616,16 +590,12 @@ fh_update(struct svc_fh *fhp) dentry = fhp->fh_dentry; if (d_really_is_negative(dentry)) goto out_negative; - if (fhp->fh_handle.fh_version != 1) { - _fh_update_old(dentry, fhp->fh_export, &fhp->fh_handle); - } else { - if (fhp->fh_handle.fh_fileid_type != FILEID_ROOT) - return 0; + if (fhp->fh_handle.fh_fileid_type != FILEID_ROOT) + return 0; - _fh_update(fhp, fhp->fh_export, dentry); - if (fhp->fh_handle.fh_fileid_type == FILEID_INVALID) - return nfserr_opnotsupp; - } + _fh_update(fhp, fhp->fh_export, dentry); + if (fhp->fh_handle.fh_fileid_type == FILEID_INVALID) + return nfserr_opnotsupp; return 0; out_bad: printk(KERN_ERR "fh_update: fh not verified!\n"); @@ -636,6 +606,85 @@ out_negative: return nfserr_serverfault; } +/** + * fh_fill_pre_attrs - Fill in pre-op attributes + * @fhp: file handle to be updated + * + */ +void fh_fill_pre_attrs(struct svc_fh *fhp) +{ + bool v4 = (fhp->fh_maxsize == NFS4_FHSIZE); + struct inode *inode; + struct kstat stat; + __be32 err; + + if (fhp->fh_no_wcc || fhp->fh_pre_saved) + return; + + inode = d_inode(fhp->fh_dentry); + err = fh_getattr(fhp, &stat); + if (err) { + /* Grab the times from inode anyway */ + stat.mtime = inode->i_mtime; + stat.ctime = inode->i_ctime; + stat.size = inode->i_size; + } + if (v4) + fhp->fh_pre_change = nfsd4_change_attribute(&stat, inode); + + fhp->fh_pre_mtime = stat.mtime; + fhp->fh_pre_ctime = stat.ctime; + fhp->fh_pre_size = stat.size; + fhp->fh_pre_saved = true; +} + +/** + * fh_fill_post_attrs - Fill in post-op attributes + * @fhp: file handle to be updated + * + */ +void fh_fill_post_attrs(struct svc_fh *fhp) +{ + bool v4 = (fhp->fh_maxsize == NFS4_FHSIZE); + struct inode *inode = d_inode(fhp->fh_dentry); + __be32 err; + + if (fhp->fh_no_wcc) + return; + + if (fhp->fh_post_saved) + printk("nfsd: inode locked twice during operation.\n"); + + err = fh_getattr(fhp, &fhp->fh_post_attr); + if (err) { + fhp->fh_post_saved = false; + fhp->fh_post_attr.ctime = inode->i_ctime; + } else + fhp->fh_post_saved = true; + if (v4) + fhp->fh_post_change = + nfsd4_change_attribute(&fhp->fh_post_attr, inode); +} + +/** + * fh_fill_both_attrs - Fill pre-op and post-op attributes + * @fhp: file handle to be updated + * + * This is used when the directory wasn't changed, but wcc attributes + * are needed anyway. + */ +void fh_fill_both_attrs(struct svc_fh *fhp) +{ + fh_fill_post_attrs(fhp); + if (!fhp->fh_post_saved) + return; + fhp->fh_pre_change = fhp->fh_post_change; + fhp->fh_pre_mtime = fhp->fh_post_attr.mtime; + fhp->fh_pre_ctime = fhp->fh_post_attr.ctime; + fhp->fh_pre_size = fhp->fh_post_attr.size; + fhp->fh_pre_saved = true; +} + /* * Release a file handle. */ @@ -645,16 +694,16 @@ fh_put(struct svc_fh *fhp) struct dentry * dentry = fhp->fh_dentry; struct svc_export * exp = fhp->fh_export; if (dentry) { - fh_unlock(fhp); fhp->fh_dentry = NULL; dput(dentry); - fh_clear_wcc(fhp); + fh_clear_pre_post_attrs(fhp); } fh_drop_write(fhp); if (exp) { exp_put(exp); fhp->fh_export = NULL; } + fhp->fh_no_wcc = false; return; } @@ -664,20 +713,15 @@ fh_put(struct svc_fh *fhp) char * SVCFH_fmt(struct svc_fh *fhp) { struct knfsd_fh *fh = &fhp->fh_handle; + static char buf[2+1+1+64*3+1]; - static char buf[80]; - sprintf(buf, "%d: %08x %08x %08x %08x %08x %08x", - fh->fh_size, - fh->fh_base.fh_pad[0], - fh->fh_base.fh_pad[1], - fh->fh_base.fh_pad[2], - fh->fh_base.fh_pad[3], - fh->fh_base.fh_pad[4], - fh->fh_base.fh_pad[5]); + if (fh->fh_size < 0 || fh->fh_size> 64) + return "bad-fh"; + sprintf(buf, "%d: %*ph", fh->fh_size, fh->fh_size, fh->fh_raw); return buf; } -enum fsid_source fsid_source(struct svc_fh *fhp) +enum fsid_source fsid_source(const struct svc_fh *fhp) { if (fhp->fh_handle.fh_version != 1) return FSIDSOURCE_DEV; diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h index 56cfbc361561..c3ae6414fc5c 100644 --- a/fs/nfsd/nfsfh.h +++ b/fs/nfsd/nfsfh.h @@ -10,8 +10,56 @@ #include <linux/crc32.h> #include <linux/sunrpc/svc.h> -#include <uapi/linux/nfsd/nfsfh.h> #include <linux/iversion.h> +#include <linux/exportfs.h> +#include <linux/nfs4.h> + +/* + * The file handle starts with a sequence of four-byte words. + * The first word contains a version number (1) and three descriptor bytes + * that tell how the remaining 3 variable length fields should be handled. + * These three bytes are auth_type, fsid_type and fileid_type. + * + * All four-byte values are in host-byte-order. + * + * The auth_type field is deprecated and must be set to 0. + * + * The fsid_type identifies how the filesystem (or export point) is + * encoded. + * Current values: + * 0 - 4 byte device id (ms-2-bytes major, ls-2-bytes minor), 4byte inode number + * NOTE: we cannot use the kdev_t device id value, because kdev_t.h + * says we mustn't. We must break it up and reassemble. + * 1 - 4 byte user specified identifier + * 2 - 4 byte major, 4 byte minor, 4 byte inode number - DEPRECATED + * 3 - 4 byte device id, encoded for user-space, 4 byte inode number + * 4 - 4 byte inode number and 4 byte uuid + * 5 - 8 byte uuid + * 6 - 16 byte uuid + * 7 - 8 byte inode number and 16 byte uuid + * + * The fileid_type identifies how the file within the filesystem is encoded. + * The values for this field are filesystem specific, exccept that + * filesystems must not use the values '0' or '0xff'. 'See enum fid_type' + * in include/linux/exportfs.h for currently registered values. + */ + +struct knfsd_fh { + unsigned int fh_size; /* + * Points to the current size while + * building a new file handle. + */ + union { + char fh_raw[NFS4_FHSIZE]; + struct { + u8 fh_version; /* == 1 */ + u8 fh_auth_type; /* deprecated */ + u8 fh_fsid_type; + u8 fh_fileid_type; + u32 fh_fsid[]; /* flexible-array member */ + }; + }; +}; static inline __u32 ino_t_to_u32(ino_t ino) { @@ -33,14 +81,18 @@ typedef struct svc_fh { struct dentry * fh_dentry; /* validated dentry */ struct svc_export * fh_export; /* export pointer */ - bool fh_locked; /* inode locked by us */ bool fh_want_write; /* remount protection taken */ + bool fh_no_wcc; /* no wcc data needed */ + bool fh_no_atomic_attr; + /* + * wcc data is not atomic with + * operation + */ int fh_flags; /* FH flags */ -#ifdef CONFIG_NFSD_V3 bool fh_post_saved; /* post-op attrs saved */ bool fh_pre_saved; /* pre-op attrs saved */ - /* Pre-op attributes saved during fh_lock */ + /* Pre-op attributes saved when inode is locked */ __u64 fh_pre_size; /* size before operation */ struct timespec64 fh_pre_mtime; /* mtime before oper */ struct timespec64 fh_pre_ctime; /* ctime before oper */ @@ -50,11 +102,9 @@ typedef struct svc_fh { */ u64 fh_pre_change; - /* Post-op attributes saved in fh_unlock */ + /* Post-op attributes saved in fh_fill_post_attrs() */ struct kstat fh_post_attr; /* full attrs after operation */ u64 fh_post_change; /* nfsv4 change; see above */ -#endif /* CONFIG_NFSD_V3 */ - } svc_fh; #define NFSD4_FH_FOREIGN (1<<0) #define SET_FH_FLAG(c, f) ((c)->fh_flags |= (f)) @@ -76,7 +126,7 @@ enum fsid_source { FSIDSOURCE_FSID, FSIDSOURCE_UUID, }; -extern enum fsid_source fsid_source(struct svc_fh *fhp); +extern enum fsid_source fsid_source(const struct svc_fh *fhp); /* @@ -172,8 +222,8 @@ void fh_put(struct svc_fh *); static __inline__ struct svc_fh * fh_copy(struct svc_fh *dst, struct svc_fh *src) { - WARN_ON(src->fh_dentry || src->fh_locked); - + WARN_ON(src->fh_dentry); + *dst = *src; return dst; } @@ -182,7 +232,7 @@ static inline void fh_copy_shallow(struct knfsd_fh *dst, struct knfsd_fh *src) { dst->fh_size = src->fh_size; - memcpy(&dst->fh_base, &src->fh_base, src->fh_size); + memcpy(&dst->fh_raw, &src->fh_raw, src->fh_size); } static __inline__ struct svc_fh * @@ -197,7 +247,7 @@ static inline bool fh_match(struct knfsd_fh *fh1, struct knfsd_fh *fh2) { if (fh1->fh_size != fh2->fh_size) return false; - if (memcmp(fh1->fh_base.fh_pad, fh2->fh_base.fh_pad, fh1->fh_size) != 0) + if (memcmp(fh1->fh_raw, fh2->fh_raw, fh1->fh_size) != 0) return false; return true; } @@ -219,27 +269,23 @@ static inline bool fh_fsid_match(struct knfsd_fh *fh1, struct knfsd_fh *fh2) * returns a crc32 hash for the filehandle that is compatible with * the one displayed by "wireshark". */ - -static inline u32 -knfsd_fh_hash(struct knfsd_fh *fh) +static inline u32 knfsd_fh_hash(const struct knfsd_fh *fh) { - return ~crc32_le(0xFFFFFFFF, (unsigned char *)&fh->fh_base, fh->fh_size); + return ~crc32_le(0xFFFFFFFF, fh->fh_raw, fh->fh_size); } #else -static inline u32 -knfsd_fh_hash(struct knfsd_fh *fh) +static inline u32 knfsd_fh_hash(const struct knfsd_fh *fh) { return 0; } #endif -#ifdef CONFIG_NFSD_V3 -/* - * The wcc data stored in current_fh should be cleared - * between compound ops. +/** + * fh_clear_pre_post_attrs - Reset pre/post attributes + * @fhp: file handle to be updated + * */ -static inline void -fh_clear_wcc(struct svc_fh *fhp) +static inline void fh_clear_pre_post_attrs(struct svc_fh *fhp) { fhp->fh_post_saved = false; fhp->fh_pre_saved = false; @@ -259,68 +305,21 @@ fh_clear_wcc(struct svc_fh *fhp) static inline u64 nfsd4_change_attribute(struct kstat *stat, struct inode *inode) { - u64 chattr; - - chattr = stat->ctime.tv_sec; - chattr <<= 30; - chattr += stat->ctime.tv_nsec; - chattr += inode_query_iversion(inode); - return chattr; -} - -extern void fill_pre_wcc(struct svc_fh *fhp); -extern void fill_post_wcc(struct svc_fh *fhp); -#else -#define fh_clear_wcc(ignored) -#define fill_pre_wcc(ignored) -#define fill_post_wcc(notused) -#endif /* CONFIG_NFSD_V3 */ - - -/* - * Lock a file handle/inode - * NOTE: both fh_lock and fh_unlock are done "by hand" in - * vfs.c:nfsd_rename as it needs to grab 2 i_mutex's at once - * so, any changes here should be reflected there. - */ - -static inline void -fh_lock_nested(struct svc_fh *fhp, unsigned int subclass) -{ - struct dentry *dentry = fhp->fh_dentry; - struct inode *inode; - - BUG_ON(!dentry); - - if (fhp->fh_locked) { - printk(KERN_WARNING "fh_lock: %pd2 already locked!\n", - dentry); - return; - } - - inode = d_inode(dentry); - inode_lock_nested(inode, subclass); - fill_pre_wcc(fhp); - fhp->fh_locked = true; -} - -static inline void -fh_lock(struct svc_fh *fhp) -{ - fh_lock_nested(fhp, I_MUTEX_NORMAL); -} - -/* - * Unlock a file handle/inode - */ -static inline void -fh_unlock(struct svc_fh *fhp) -{ - if (fhp->fh_locked) { - fill_post_wcc(fhp); - inode_unlock(d_inode(fhp->fh_dentry)); - fhp->fh_locked = false; - } + if (inode->i_sb->s_export_op->fetch_iversion) + return inode->i_sb->s_export_op->fetch_iversion(inode); + else if (IS_I_VERSION(inode)) { + u64 chattr; + + chattr = stat->ctime.tv_sec; + chattr <<= 30; + chattr += stat->ctime.tv_nsec; + chattr += inode_query_iversion(inode); + return chattr; + } else + return time_to_chattr(&stat->ctime); } +extern void fh_fill_pre_attrs(struct svc_fh *fhp); +extern void fh_fill_post_attrs(struct svc_fh *fhp); +extern void fh_fill_both_attrs(struct svc_fh *fhp); #endif /* _LINUX_NFSD_NFSFH_H */ diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c index 543bbe0a556e..82b3ddeacc33 100644 --- a/fs/nfsd/nfsproc.c +++ b/fs/nfsd/nfsproc.c @@ -11,30 +11,14 @@ #include "xdr.h" #include "vfs.h" -typedef struct svc_rqst svc_rqst; -typedef struct svc_buf svc_buf; - #define NFSDDBG_FACILITY NFSDDBG_PROC - static __be32 nfsd_proc_null(struct svc_rqst *rqstp) { - return nfs_ok; + return rpc_success; } -static __be32 -nfsd_return_attrs(__be32 err, struct nfsd_attrstat *resp) -{ - if (err) return err; - return fh_getattr(&resp->fh, &resp->stat); -} -static __be32 -nfsd_return_dirop(__be32 err, struct nfsd_diropres *resp) -{ - if (err) return err; - return fh_getattr(&resp->fh, &resp->stat); -} /* * Get a file's attributes * N.B. After this call resp->fh needs an fh_put @@ -44,13 +28,17 @@ nfsd_proc_getattr(struct svc_rqst *rqstp) { struct nfsd_fhandle *argp = rqstp->rq_argp; struct nfsd_attrstat *resp = rqstp->rq_resp; - __be32 nfserr; + dprintk("nfsd: GETATTR %s\n", SVCFH_fmt(&argp->fh)); fh_copy(&resp->fh, &argp->fh); - nfserr = fh_verify(rqstp, &resp->fh, 0, - NFSD_MAY_NOP | NFSD_MAY_BYPASS_GSS_ON_ROOT); - return nfsd_return_attrs(nfserr, resp); + resp->status = fh_verify(rqstp, &resp->fh, 0, + NFSD_MAY_NOP | NFSD_MAY_BYPASS_GSS_ON_ROOT); + if (resp->status != nfs_ok) + goto out; + resp->status = fh_getattr(&resp->fh, &resp->stat); +out: + return rpc_success; } /* @@ -63,8 +51,10 @@ nfsd_proc_setattr(struct svc_rqst *rqstp) struct nfsd_sattrargs *argp = rqstp->rq_argp; struct nfsd_attrstat *resp = rqstp->rq_resp; struct iattr *iap = &argp->attrs; + struct nfsd_attrs attrs = { + .na_iattr = iap, + }; struct svc_fh *fhp; - __be32 nfserr; dprintk("nfsd: SETATTR %s, valid=%x, size=%ld\n", SVCFH_fmt(&argp->fh), @@ -96,14 +86,14 @@ nfsd_proc_setattr(struct svc_rqst *rqstp) */ time64_t delta = iap->ia_atime.tv_sec - ktime_get_real_seconds(); - nfserr = fh_verify(rqstp, fhp, 0, NFSD_MAY_NOP); - if (nfserr) - goto done; + resp->status = fh_verify(rqstp, fhp, 0, NFSD_MAY_NOP); + if (resp->status != nfs_ok) + goto out; if (delta < 0) delta = -delta; if (delta < MAX_TOUCH_TIME_ERROR && - setattr_prepare(fhp->fh_dentry, iap) != 0) { + setattr_prepare(&init_user_ns, fhp->fh_dentry, iap) != 0) { /* * Turn off ATTR_[AM]TIME_SET but leave ATTR_[AM]TIME. * This will cause notify_change to set these times @@ -113,9 +103,20 @@ nfsd_proc_setattr(struct svc_rqst *rqstp) } } - nfserr = nfsd_setattr(rqstp, fhp, iap, 0, (time64_t)0); -done: - return nfsd_return_attrs(nfserr, resp); + resp->status = nfsd_setattr(rqstp, fhp, &attrs, 0, (time64_t)0); + if (resp->status != nfs_ok) + goto out; + + resp->status = fh_getattr(&resp->fh, &resp->stat); +out: + return rpc_success; +} + +/* Obsolete, replaced by MNTPROC_MNT. */ +static __be32 +nfsd_proc_root(struct svc_rqst *rqstp) +{ + return rpc_success; } /* @@ -129,17 +130,20 @@ nfsd_proc_lookup(struct svc_rqst *rqstp) { struct nfsd_diropargs *argp = rqstp->rq_argp; struct nfsd_diropres *resp = rqstp->rq_resp; - __be32 nfserr; dprintk("nfsd: LOOKUP %s %.*s\n", SVCFH_fmt(&argp->fh), argp->len, argp->name); fh_init(&resp->fh, NFS_FHSIZE); - nfserr = nfsd_lookup(rqstp, &argp->fh, argp->name, argp->len, - &resp->fh); - + resp->status = nfsd_lookup(rqstp, &argp->fh, argp->name, argp->len, + &resp->fh); fh_put(&argp->fh); - return nfsd_return_dirop(nfserr, resp); + if (resp->status != nfs_ok) + goto out; + + resp->status = fh_getattr(&resp->fh, &resp->stat); +out: + return rpc_success; } /* @@ -148,18 +152,19 @@ nfsd_proc_lookup(struct svc_rqst *rqstp) static __be32 nfsd_proc_readlink(struct svc_rqst *rqstp) { - struct nfsd_readlinkargs *argp = rqstp->rq_argp; + struct nfsd_fhandle *argp = rqstp->rq_argp; struct nfsd_readlinkres *resp = rqstp->rq_resp; - __be32 nfserr; dprintk("nfsd: READLINK %s\n", SVCFH_fmt(&argp->fh)); /* Read the symlink. */ resp->len = NFS_MAXPATHLEN; - nfserr = nfsd_readlink(rqstp, &argp->fh, argp->buffer, &resp->len); + resp->page = *(rqstp->rq_next_page++); + resp->status = nfsd_readlink(rqstp, &argp->fh, + page_address(resp->page), &resp->len); fh_put(&argp->fh); - return nfserr; + return rpc_success; } /* @@ -171,36 +176,50 @@ nfsd_proc_read(struct svc_rqst *rqstp) { struct nfsd_readargs *argp = rqstp->rq_argp; struct nfsd_readres *resp = rqstp->rq_resp; - __be32 nfserr; + unsigned int len; u32 eof; + int v; dprintk("nfsd: READ %s %d bytes at %d\n", SVCFH_fmt(&argp->fh), argp->count, argp->offset); + argp->count = min_t(u32, argp->count, NFSSVC_MAXBLKSIZE_V2); + argp->count = min_t(u32, argp->count, rqstp->rq_res.buflen); + + v = 0; + len = argp->count; + resp->pages = rqstp->rq_next_page; + while (len > 0) { + struct page *page = *(rqstp->rq_next_page++); + + rqstp->rq_vec[v].iov_base = page_address(page); + rqstp->rq_vec[v].iov_len = min_t(unsigned int, len, PAGE_SIZE); + len -= rqstp->rq_vec[v].iov_len; + v++; + } + /* Obtain buffer pointer for payload. 19 is 1 word for * status, 17 words for fattr, and 1 word for the byte count. */ - - if (NFSSVC_MAXBLKSIZE_V2 < argp->count) { - char buf[RPC_MAX_ADDRBUFLEN]; - printk(KERN_NOTICE - "oversized read request from %s (%d bytes)\n", - svc_print_addr(rqstp, buf, sizeof(buf)), - argp->count); - argp->count = NFSSVC_MAXBLKSIZE_V2; - } svc_reserve_auth(rqstp, (19<<2) + argp->count + 4); resp->count = argp->count; - nfserr = nfsd_read(rqstp, fh_copy(&resp->fh, &argp->fh), - argp->offset, - rqstp->rq_vec, argp->vlen, - &resp->count, - &eof); - - if (nfserr) return nfserr; - return fh_getattr(&resp->fh, &resp->stat); + fh_copy(&resp->fh, &argp->fh); + resp->status = nfsd_read(rqstp, &resp->fh, argp->offset, + rqstp->rq_vec, v, &resp->count, &eof); + if (resp->status == nfs_ok) + resp->status = fh_getattr(&resp->fh, &resp->stat); + else if (resp->status == nfserr_jukebox) + return rpc_drop_reply; + return rpc_success; +} + +/* Reserved */ +static __be32 +nfsd_proc_writecache(struct svc_rqst *rqstp) +{ + return rpc_success; } /* @@ -212,22 +231,23 @@ nfsd_proc_write(struct svc_rqst *rqstp) { struct nfsd_writeargs *argp = rqstp->rq_argp; struct nfsd_attrstat *resp = rqstp->rq_resp; - __be32 nfserr; unsigned long cnt = argp->len; unsigned int nvecs; - dprintk("nfsd: WRITE %s %d bytes at %d\n", + dprintk("nfsd: WRITE %s %u bytes at %d\n", SVCFH_fmt(&argp->fh), argp->len, argp->offset); - nvecs = svc_fill_write_vector(rqstp, rqstp->rq_arg.pages, - &argp->first, cnt); - if (!nvecs) - return nfserr_io; - nfserr = nfsd_write(rqstp, fh_copy(&resp->fh, &argp->fh), - argp->offset, rqstp->rq_vec, nvecs, - &cnt, NFS_DATA_SYNC, NULL); - return nfsd_return_attrs(nfserr, resp); + nvecs = svc_fill_write_vector(rqstp, &argp->payload); + + resp->status = nfsd_write(rqstp, fh_copy(&resp->fh, &argp->fh), + argp->offset, rqstp->rq_vec, nvecs, + &cnt, NFS_DATA_SYNC, NULL); + if (resp->status == nfs_ok) + resp->status = fh_getattr(&resp->fh, &resp->stat); + else if (resp->status == nfserr_jukebox) + return rpc_drop_reply; + return rpc_success; } /* @@ -244,10 +264,12 @@ nfsd_proc_create(struct svc_rqst *rqstp) svc_fh *dirfhp = &argp->fh; svc_fh *newfhp = &resp->fh; struct iattr *attr = &argp->attrs; + struct nfsd_attrs attrs = { + .na_iattr = attr, + }; struct inode *inode; struct dentry *dchild; int type, mode; - __be32 nfserr; int hosterr; dev_t rdev = 0, wanted = new_decode_dev(attr->ia_size); @@ -255,40 +277,40 @@ nfsd_proc_create(struct svc_rqst *rqstp) SVCFH_fmt(dirfhp), argp->len, argp->name); /* First verify the parent file handle */ - nfserr = fh_verify(rqstp, dirfhp, S_IFDIR, NFSD_MAY_EXEC); - if (nfserr) + resp->status = fh_verify(rqstp, dirfhp, S_IFDIR, NFSD_MAY_EXEC); + if (resp->status != nfs_ok) goto done; /* must fh_put dirfhp even on error */ /* Check for NFSD_MAY_WRITE in nfsd_create if necessary */ - nfserr = nfserr_exist; + resp->status = nfserr_exist; if (isdotent(argp->name, argp->len)) goto done; hosterr = fh_want_write(dirfhp); if (hosterr) { - nfserr = nfserrno(hosterr); + resp->status = nfserrno(hosterr); goto done; } - fh_lock_nested(dirfhp, I_MUTEX_PARENT); + inode_lock_nested(dirfhp->fh_dentry->d_inode, I_MUTEX_PARENT); dchild = lookup_one_len(argp->name, dirfhp->fh_dentry, argp->len); if (IS_ERR(dchild)) { - nfserr = nfserrno(PTR_ERR(dchild)); + resp->status = nfserrno(PTR_ERR(dchild)); goto out_unlock; } fh_init(newfhp, NFS_FHSIZE); - nfserr = fh_compose(newfhp, dirfhp->fh_export, dchild, dirfhp); - if (!nfserr && d_really_is_negative(dchild)) - nfserr = nfserr_noent; + resp->status = fh_compose(newfhp, dirfhp->fh_export, dchild, dirfhp); + if (!resp->status && d_really_is_negative(dchild)) + resp->status = nfserr_noent; dput(dchild); - if (nfserr) { - if (nfserr != nfserr_noent) + if (resp->status) { + if (resp->status != nfserr_noent) goto out_unlock; /* * If the new file handle wasn't verified, we can't tell * whether the file exists or not. Time to bail ... */ - nfserr = nfserr_acces; + resp->status = nfserr_acces; if (!newfhp->fh_dentry) { printk(KERN_WARNING "nfsd_proc_create: file handle not verified\n"); @@ -314,18 +336,18 @@ nfsd_proc_create(struct svc_rqst *rqstp) rdev = inode->i_rdev; attr->ia_valid |= ATTR_SIZE; - /* FALLTHROUGH */ + fallthrough; case S_IFIFO: /* this is probably a permission check.. * at least IRIX implements perm checking on * echo thing > device-special-file-or-pipe * by doing a CREATE with type==0 */ - nfserr = nfsd_permission(rqstp, + resp->status = nfsd_permission(rqstp, newfhp->fh_export, newfhp->fh_dentry, NFSD_MAY_WRITE|NFSD_MAY_LOCAL_ACCESS); - if (nfserr && nfserr != nfserr_rofs) + if (resp->status && resp->status != nfserr_rofs) goto out_unlock; } } else @@ -361,16 +383,16 @@ nfsd_proc_create(struct svc_rqst *rqstp) attr->ia_valid &= ~ATTR_SIZE; /* Make sure the type and device matches */ - nfserr = nfserr_exist; - if (inode && type != (inode->i_mode & S_IFMT)) + resp->status = nfserr_exist; + if (inode && inode_wrong_type(inode, type)) goto out_unlock; } - nfserr = 0; + resp->status = nfs_ok; if (!inode) { /* File doesn't exist. Create it and set attrs */ - nfserr = nfsd_create_locked(rqstp, dirfhp, argp->name, - argp->len, attr, type, rdev, newfhp); + resp->status = nfsd_create_locked(rqstp, dirfhp, &attrs, type, + rdev, newfhp); } else if (type == S_IFREG) { dprintk("nfsd: existing %s, valid=%x, size=%ld\n", argp->name, attr->ia_valid, (long) attr->ia_size); @@ -380,56 +402,61 @@ nfsd_proc_create(struct svc_rqst *rqstp) */ attr->ia_valid &= ATTR_SIZE; if (attr->ia_valid) - nfserr = nfsd_setattr(rqstp, newfhp, attr, 0, (time64_t)0); + resp->status = nfsd_setattr(rqstp, newfhp, &attrs, 0, + (time64_t)0); } out_unlock: - /* We don't really need to unlock, as fh_put does it. */ - fh_unlock(dirfhp); + inode_unlock(dirfhp->fh_dentry->d_inode); fh_drop_write(dirfhp); done: fh_put(dirfhp); - return nfsd_return_dirop(nfserr, resp); + if (resp->status != nfs_ok) + goto out; + resp->status = fh_getattr(&resp->fh, &resp->stat); +out: + return rpc_success; } static __be32 nfsd_proc_remove(struct svc_rqst *rqstp) { struct nfsd_diropargs *argp = rqstp->rq_argp; - __be32 nfserr; + struct nfsd_stat *resp = rqstp->rq_resp; dprintk("nfsd: REMOVE %s %.*s\n", SVCFH_fmt(&argp->fh), argp->len, argp->name); /* Unlink. -SIFDIR means file must not be a directory */ - nfserr = nfsd_unlink(rqstp, &argp->fh, -S_IFDIR, argp->name, argp->len); + resp->status = nfsd_unlink(rqstp, &argp->fh, -S_IFDIR, + argp->name, argp->len); fh_put(&argp->fh); - return nfserr; + return rpc_success; } static __be32 nfsd_proc_rename(struct svc_rqst *rqstp) { struct nfsd_renameargs *argp = rqstp->rq_argp; - __be32 nfserr; + struct nfsd_stat *resp = rqstp->rq_resp; dprintk("nfsd: RENAME %s %.*s -> \n", SVCFH_fmt(&argp->ffh), argp->flen, argp->fname); dprintk("nfsd: -> %s %.*s\n", SVCFH_fmt(&argp->tfh), argp->tlen, argp->tname); - nfserr = nfsd_rename(rqstp, &argp->ffh, argp->fname, argp->flen, - &argp->tfh, argp->tname, argp->tlen); + resp->status = nfsd_rename(rqstp, &argp->ffh, argp->fname, argp->flen, + &argp->tfh, argp->tname, argp->tlen); fh_put(&argp->ffh); fh_put(&argp->tfh); - return nfserr; + return rpc_success; } static __be32 nfsd_proc_link(struct svc_rqst *rqstp) { struct nfsd_linkargs *argp = rqstp->rq_argp; - __be32 nfserr; + struct nfsd_stat *resp = rqstp->rq_resp; dprintk("nfsd: LINK %s ->\n", SVCFH_fmt(&argp->ffh)); @@ -438,41 +465,49 @@ nfsd_proc_link(struct svc_rqst *rqstp) argp->tlen, argp->tname); - nfserr = nfsd_link(rqstp, &argp->tfh, argp->tname, argp->tlen, - &argp->ffh); + resp->status = nfsd_link(rqstp, &argp->tfh, argp->tname, argp->tlen, + &argp->ffh); fh_put(&argp->ffh); fh_put(&argp->tfh); - return nfserr; + return rpc_success; } static __be32 nfsd_proc_symlink(struct svc_rqst *rqstp) { struct nfsd_symlinkargs *argp = rqstp->rq_argp; + struct nfsd_stat *resp = rqstp->rq_resp; + struct nfsd_attrs attrs = { + .na_iattr = &argp->attrs, + }; struct svc_fh newfh; - __be32 nfserr; - if (argp->tlen > NFS_MAXPATHLEN) - return nfserr_nametoolong; + if (argp->tlen > NFS_MAXPATHLEN) { + resp->status = nfserr_nametoolong; + goto out; + } argp->tname = svc_fill_symlink_pathname(rqstp, &argp->first, page_address(rqstp->rq_arg.pages[0]), argp->tlen); - if (IS_ERR(argp->tname)) - return nfserrno(PTR_ERR(argp->tname)); + if (IS_ERR(argp->tname)) { + resp->status = nfserrno(PTR_ERR(argp->tname)); + goto out; + } dprintk("nfsd: SYMLINK %s %.*s -> %.*s\n", SVCFH_fmt(&argp->ffh), argp->flen, argp->fname, argp->tlen, argp->tname); fh_init(&newfh, NFS_FHSIZE); - nfserr = nfsd_symlink(rqstp, &argp->ffh, argp->fname, argp->flen, - argp->tname, &newfh); + resp->status = nfsd_symlink(rqstp, &argp->ffh, argp->fname, argp->flen, + argp->tname, &attrs, &newfh); kfree(argp->tname); fh_put(&argp->ffh); fh_put(&newfh); - return nfserr; +out: + return rpc_success; } /* @@ -484,7 +519,9 @@ nfsd_proc_mkdir(struct svc_rqst *rqstp) { struct nfsd_createargs *argp = rqstp->rq_argp; struct nfsd_diropres *resp = rqstp->rq_resp; - __be32 nfserr; + struct nfsd_attrs attrs = { + .na_iattr = &argp->attrs, + }; dprintk("nfsd: MKDIR %s %.*s\n", SVCFH_fmt(&argp->fh), argp->len, argp->name); @@ -495,10 +532,15 @@ nfsd_proc_mkdir(struct svc_rqst *rqstp) argp->attrs.ia_valid &= ~ATTR_SIZE; fh_init(&resp->fh, NFS_FHSIZE); - nfserr = nfsd_create(rqstp, &argp->fh, argp->name, argp->len, - &argp->attrs, S_IFDIR, 0, &resp->fh); + resp->status = nfsd_create(rqstp, &argp->fh, argp->name, argp->len, + &attrs, S_IFDIR, 0, &resp->fh); fh_put(&argp->fh); - return nfsd_return_dirop(nfserr, resp); + if (resp->status != nfs_ok) + goto out; + + resp->status = fh_getattr(&resp->fh, &resp->stat); +out: + return rpc_success; } /* @@ -508,13 +550,32 @@ static __be32 nfsd_proc_rmdir(struct svc_rqst *rqstp) { struct nfsd_diropargs *argp = rqstp->rq_argp; - __be32 nfserr; + struct nfsd_stat *resp = rqstp->rq_resp; dprintk("nfsd: RMDIR %s %.*s\n", SVCFH_fmt(&argp->fh), argp->len, argp->name); - nfserr = nfsd_unlink(rqstp, &argp->fh, S_IFDIR, argp->name, argp->len); + resp->status = nfsd_unlink(rqstp, &argp->fh, S_IFDIR, + argp->name, argp->len); fh_put(&argp->fh); - return nfserr; + return rpc_success; +} + +static void nfsd_init_dirlist_pages(struct svc_rqst *rqstp, + struct nfsd_readdirres *resp, + u32 count) +{ + struct xdr_buf *buf = &resp->dirlist; + struct xdr_stream *xdr = &resp->xdr; + + memset(buf, 0, sizeof(*buf)); + + /* Reserve room for the NULL ptr & eof flag (-2 words) */ + buf->buflen = clamp(count, (u32)(XDR_UNIT * 2), (u32)PAGE_SIZE); + buf->buflen -= XDR_UNIT * 2; + buf->pages = rqstp->rq_next_page; + rqstp->rq_next_page++; + + xdr_init_encode_pages(xdr, buf, buf->pages, NULL); } /* @@ -525,37 +586,23 @@ nfsd_proc_readdir(struct svc_rqst *rqstp) { struct nfsd_readdirargs *argp = rqstp->rq_argp; struct nfsd_readdirres *resp = rqstp->rq_resp; - int count; - __be32 nfserr; loff_t offset; dprintk("nfsd: READDIR %s %d bytes at %d\n", SVCFH_fmt(&argp->fh), argp->count, argp->cookie); - /* Shrink to the client read size */ - count = (argp->count >> 2) - 2; + nfsd_init_dirlist_pages(rqstp, resp, argp->count); - /* Make sure we've room for the NULL ptr & eof flag */ - count -= 2; - if (count < 0) - count = 0; - - resp->buffer = argp->buffer; - resp->offset = NULL; - resp->buflen = count; resp->common.err = nfs_ok; - /* Read directory and encode entries on the fly */ + resp->cookie_offset = 0; offset = argp->cookie; - nfserr = nfsd_readdir(rqstp, &argp->fh, &offset, - &resp->common, nfssvc_encode_entry); - - resp->count = resp->buffer - argp->buffer; - if (resp->offset) - *resp->offset = htonl(offset); + resp->status = nfsd_readdir(rqstp, &argp->fh, &offset, + &resp->common, nfssvc_encode_entry); + nfssvc_encode_nfscookie(resp, offset); fh_put(&argp->fh); - return nfserr; + return rpc_success; } /* @@ -566,21 +613,19 @@ nfsd_proc_statfs(struct svc_rqst *rqstp) { struct nfsd_fhandle *argp = rqstp->rq_argp; struct nfsd_statfsres *resp = rqstp->rq_resp; - __be32 nfserr; dprintk("nfsd: STATFS %s\n", SVCFH_fmt(&argp->fh)); - nfserr = nfsd_statfs(rqstp, &argp->fh, &resp->stats, - NFSD_MAY_BYPASS_GSS_ON_ROOT); + resp->status = nfsd_statfs(rqstp, &argp->fh, &resp->stats, + NFSD_MAY_BYPASS_GSS_ON_ROOT); fh_put(&argp->fh); - return nfserr; + return rpc_success; } /* * NFSv2 Server procedures. * Only the results of non-idempotent operations are cached. */ -struct nfsd_void { int dummy; }; #define ST 1 /* status */ #define FH 8 /* filehandle */ @@ -589,169 +634,207 @@ struct nfsd_void { int dummy; }; static const struct svc_procedure nfsd_procedures2[18] = { [NFSPROC_NULL] = { .pc_func = nfsd_proc_null, - .pc_decode = nfssvc_decode_void, - .pc_encode = nfssvc_encode_void, - .pc_argsize = sizeof(struct nfsd_void), - .pc_ressize = sizeof(struct nfsd_void), + .pc_decode = nfssvc_decode_voidarg, + .pc_encode = nfssvc_encode_voidres, + .pc_argsize = sizeof(struct nfsd_voidargs), + .pc_argzero = sizeof(struct nfsd_voidargs), + .pc_ressize = sizeof(struct nfsd_voidres), .pc_cachetype = RC_NOCACHE, - .pc_xdrressize = ST, + .pc_xdrressize = 0, + .pc_name = "NULL", }, [NFSPROC_GETATTR] = { .pc_func = nfsd_proc_getattr, - .pc_decode = nfssvc_decode_fhandle, - .pc_encode = nfssvc_encode_attrstat, - .pc_release = nfssvc_release_fhandle, + .pc_decode = nfssvc_decode_fhandleargs, + .pc_encode = nfssvc_encode_attrstatres, + .pc_release = nfssvc_release_attrstat, .pc_argsize = sizeof(struct nfsd_fhandle), + .pc_argzero = sizeof(struct nfsd_fhandle), .pc_ressize = sizeof(struct nfsd_attrstat), .pc_cachetype = RC_NOCACHE, .pc_xdrressize = ST+AT, + .pc_name = "GETATTR", }, [NFSPROC_SETATTR] = { .pc_func = nfsd_proc_setattr, .pc_decode = nfssvc_decode_sattrargs, - .pc_encode = nfssvc_encode_attrstat, - .pc_release = nfssvc_release_fhandle, + .pc_encode = nfssvc_encode_attrstatres, + .pc_release = nfssvc_release_attrstat, .pc_argsize = sizeof(struct nfsd_sattrargs), + .pc_argzero = sizeof(struct nfsd_sattrargs), .pc_ressize = sizeof(struct nfsd_attrstat), .pc_cachetype = RC_REPLBUFF, .pc_xdrressize = ST+AT, + .pc_name = "SETATTR", }, [NFSPROC_ROOT] = { - .pc_decode = nfssvc_decode_void, - .pc_encode = nfssvc_encode_void, - .pc_argsize = sizeof(struct nfsd_void), - .pc_ressize = sizeof(struct nfsd_void), + .pc_func = nfsd_proc_root, + .pc_decode = nfssvc_decode_voidarg, + .pc_encode = nfssvc_encode_voidres, + .pc_argsize = sizeof(struct nfsd_voidargs), + .pc_argzero = sizeof(struct nfsd_voidargs), + .pc_ressize = sizeof(struct nfsd_voidres), .pc_cachetype = RC_NOCACHE, - .pc_xdrressize = ST, + .pc_xdrressize = 0, + .pc_name = "ROOT", }, [NFSPROC_LOOKUP] = { .pc_func = nfsd_proc_lookup, .pc_decode = nfssvc_decode_diropargs, .pc_encode = nfssvc_encode_diropres, - .pc_release = nfssvc_release_fhandle, + .pc_release = nfssvc_release_diropres, .pc_argsize = sizeof(struct nfsd_diropargs), + .pc_argzero = sizeof(struct nfsd_diropargs), .pc_ressize = sizeof(struct nfsd_diropres), .pc_cachetype = RC_NOCACHE, .pc_xdrressize = ST+FH+AT, + .pc_name = "LOOKUP", }, [NFSPROC_READLINK] = { .pc_func = nfsd_proc_readlink, - .pc_decode = nfssvc_decode_readlinkargs, + .pc_decode = nfssvc_decode_fhandleargs, .pc_encode = nfssvc_encode_readlinkres, - .pc_argsize = sizeof(struct nfsd_readlinkargs), + .pc_argsize = sizeof(struct nfsd_fhandle), + .pc_argzero = sizeof(struct nfsd_fhandle), .pc_ressize = sizeof(struct nfsd_readlinkres), .pc_cachetype = RC_NOCACHE, .pc_xdrressize = ST+1+NFS_MAXPATHLEN/4, + .pc_name = "READLINK", }, [NFSPROC_READ] = { .pc_func = nfsd_proc_read, .pc_decode = nfssvc_decode_readargs, .pc_encode = nfssvc_encode_readres, - .pc_release = nfssvc_release_fhandle, + .pc_release = nfssvc_release_readres, .pc_argsize = sizeof(struct nfsd_readargs), + .pc_argzero = sizeof(struct nfsd_readargs), .pc_ressize = sizeof(struct nfsd_readres), .pc_cachetype = RC_NOCACHE, .pc_xdrressize = ST+AT+1+NFSSVC_MAXBLKSIZE_V2/4, + .pc_name = "READ", }, [NFSPROC_WRITECACHE] = { - .pc_decode = nfssvc_decode_void, - .pc_encode = nfssvc_encode_void, - .pc_argsize = sizeof(struct nfsd_void), - .pc_ressize = sizeof(struct nfsd_void), + .pc_func = nfsd_proc_writecache, + .pc_decode = nfssvc_decode_voidarg, + .pc_encode = nfssvc_encode_voidres, + .pc_argsize = sizeof(struct nfsd_voidargs), + .pc_argzero = sizeof(struct nfsd_voidargs), + .pc_ressize = sizeof(struct nfsd_voidres), .pc_cachetype = RC_NOCACHE, - .pc_xdrressize = ST, + .pc_xdrressize = 0, + .pc_name = "WRITECACHE", }, [NFSPROC_WRITE] = { .pc_func = nfsd_proc_write, .pc_decode = nfssvc_decode_writeargs, - .pc_encode = nfssvc_encode_attrstat, - .pc_release = nfssvc_release_fhandle, + .pc_encode = nfssvc_encode_attrstatres, + .pc_release = nfssvc_release_attrstat, .pc_argsize = sizeof(struct nfsd_writeargs), + .pc_argzero = sizeof(struct nfsd_writeargs), .pc_ressize = sizeof(struct nfsd_attrstat), .pc_cachetype = RC_REPLBUFF, .pc_xdrressize = ST+AT, + .pc_name = "WRITE", }, [NFSPROC_CREATE] = { .pc_func = nfsd_proc_create, .pc_decode = nfssvc_decode_createargs, .pc_encode = nfssvc_encode_diropres, - .pc_release = nfssvc_release_fhandle, + .pc_release = nfssvc_release_diropres, .pc_argsize = sizeof(struct nfsd_createargs), + .pc_argzero = sizeof(struct nfsd_createargs), .pc_ressize = sizeof(struct nfsd_diropres), .pc_cachetype = RC_REPLBUFF, .pc_xdrressize = ST+FH+AT, + .pc_name = "CREATE", }, [NFSPROC_REMOVE] = { .pc_func = nfsd_proc_remove, .pc_decode = nfssvc_decode_diropargs, - .pc_encode = nfssvc_encode_void, + .pc_encode = nfssvc_encode_statres, .pc_argsize = sizeof(struct nfsd_diropargs), - .pc_ressize = sizeof(struct nfsd_void), + .pc_argzero = sizeof(struct nfsd_diropargs), + .pc_ressize = sizeof(struct nfsd_stat), .pc_cachetype = RC_REPLSTAT, .pc_xdrressize = ST, + .pc_name = "REMOVE", }, [NFSPROC_RENAME] = { .pc_func = nfsd_proc_rename, .pc_decode = nfssvc_decode_renameargs, - .pc_encode = nfssvc_encode_void, + .pc_encode = nfssvc_encode_statres, .pc_argsize = sizeof(struct nfsd_renameargs), - .pc_ressize = sizeof(struct nfsd_void), + .pc_argzero = sizeof(struct nfsd_renameargs), + .pc_ressize = sizeof(struct nfsd_stat), .pc_cachetype = RC_REPLSTAT, .pc_xdrressize = ST, + .pc_name = "RENAME", }, [NFSPROC_LINK] = { .pc_func = nfsd_proc_link, .pc_decode = nfssvc_decode_linkargs, - .pc_encode = nfssvc_encode_void, + .pc_encode = nfssvc_encode_statres, .pc_argsize = sizeof(struct nfsd_linkargs), - .pc_ressize = sizeof(struct nfsd_void), + .pc_argzero = sizeof(struct nfsd_linkargs), + .pc_ressize = sizeof(struct nfsd_stat), .pc_cachetype = RC_REPLSTAT, .pc_xdrressize = ST, + .pc_name = "LINK", }, [NFSPROC_SYMLINK] = { .pc_func = nfsd_proc_symlink, .pc_decode = nfssvc_decode_symlinkargs, - .pc_encode = nfssvc_encode_void, + .pc_encode = nfssvc_encode_statres, .pc_argsize = sizeof(struct nfsd_symlinkargs), - .pc_ressize = sizeof(struct nfsd_void), + .pc_argzero = sizeof(struct nfsd_symlinkargs), + .pc_ressize = sizeof(struct nfsd_stat), .pc_cachetype = RC_REPLSTAT, .pc_xdrressize = ST, + .pc_name = "SYMLINK", }, [NFSPROC_MKDIR] = { .pc_func = nfsd_proc_mkdir, .pc_decode = nfssvc_decode_createargs, .pc_encode = nfssvc_encode_diropres, - .pc_release = nfssvc_release_fhandle, + .pc_release = nfssvc_release_diropres, .pc_argsize = sizeof(struct nfsd_createargs), + .pc_argzero = sizeof(struct nfsd_createargs), .pc_ressize = sizeof(struct nfsd_diropres), .pc_cachetype = RC_REPLBUFF, .pc_xdrressize = ST+FH+AT, + .pc_name = "MKDIR", }, [NFSPROC_RMDIR] = { .pc_func = nfsd_proc_rmdir, .pc_decode = nfssvc_decode_diropargs, - .pc_encode = nfssvc_encode_void, + .pc_encode = nfssvc_encode_statres, .pc_argsize = sizeof(struct nfsd_diropargs), - .pc_ressize = sizeof(struct nfsd_void), + .pc_argzero = sizeof(struct nfsd_diropargs), + .pc_ressize = sizeof(struct nfsd_stat), .pc_cachetype = RC_REPLSTAT, .pc_xdrressize = ST, + .pc_name = "RMDIR", }, [NFSPROC_READDIR] = { .pc_func = nfsd_proc_readdir, .pc_decode = nfssvc_decode_readdirargs, .pc_encode = nfssvc_encode_readdirres, .pc_argsize = sizeof(struct nfsd_readdirargs), + .pc_argzero = sizeof(struct nfsd_readdirargs), .pc_ressize = sizeof(struct nfsd_readdirres), .pc_cachetype = RC_NOCACHE, + .pc_name = "READDIR", }, [NFSPROC_STATFS] = { .pc_func = nfsd_proc_statfs, - .pc_decode = nfssvc_decode_fhandle, + .pc_decode = nfssvc_decode_fhandleargs, .pc_encode = nfssvc_encode_statfsres, .pc_argsize = sizeof(struct nfsd_fhandle), + .pc_argzero = sizeof(struct nfsd_fhandle), .pc_ressize = sizeof(struct nfsd_statfsres), .pc_cachetype = RC_NOCACHE, .pc_xdrressize = ST+5, + .pc_name = "STATFS", }, }; @@ -782,6 +865,7 @@ nfserrno (int errno) { nfserr_io, -EIO }, { nfserr_nxio, -ENXIO }, { nfserr_fbig, -E2BIG }, + { nfserr_stale, -EBADF }, { nfserr_acces, -EACCES }, { nfserr_exist, -EEXIST }, { nfserr_xdev, -EXDEV }, @@ -810,8 +894,11 @@ nfserrno (int errno) { nfserr_toosmall, -ETOOSMALL }, { nfserr_serverfault, -ESERVERFAULT }, { nfserr_serverfault, -ENFILE }, + { nfserr_io, -EREMOTEIO }, + { nfserr_stale, -EOPENSTALE }, { nfserr_io, -EUCLEAN }, { nfserr_perm, -ENOKEY }, + { nfserr_no_grace, -ENOGRACE}, }; int i; diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 3b77b904212d..bfbd9f672f59 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -12,6 +12,7 @@ #include <linux/module.h> #include <linux/fs_struct.h> #include <linux/swap.h> +#include <linux/siphash.h> #include <linux/sunrpc/stats.h> #include <linux/sunrpc/svcsock.h> @@ -29,13 +30,9 @@ #include "netns.h" #include "filecache.h" -#define NFSDDBG_FACILITY NFSDDBG_SVC +#include "trace.h" -bool inter_copy_offload_enable; -EXPORT_SYMBOL_GPL(inter_copy_offload_enable); -module_param(inter_copy_offload_enable, bool, 0644); -MODULE_PARM_DESC(inter_copy_offload_enable, - "Enable inter server to server copy offload. Default: false"); +#define NFSDDBG_FACILITY NFSDDBG_SVC extern struct svc_program nfsd_program; static int nfsd(void *vrqstp); @@ -59,18 +56,17 @@ static __be32 nfsd_init_request(struct svc_rqst *, struct svc_process_info *); /* - * nfsd_mutex protects nn->nfsd_serv -- both the pointer itself and the members - * of the svc_serv struct. In particular, ->sv_nrthreads but also to some - * extent ->sv_temp_socks and ->sv_permsocks. It also protects nfsdstats.th_cnt + * nfsd_mutex protects nn->nfsd_serv -- both the pointer itself and some members + * of the svc_serv struct such as ->sv_temp_socks and ->sv_permsocks. * * If (out side the lock) nn->nfsd_serv is non-NULL, then it must point to a - * properly initialised 'struct svc_serv' with ->sv_nrthreads > 0. That number - * of nfsd threads must exist and each must listed in ->sp_all_threads in each - * entry of ->sv_pools[]. + * properly initialised 'struct svc_serv' with ->sv_nrthreads > 0 (unless + * nn->keep_active is set). That number of nfsd threads must + * exist and each must be listed in ->sp_all_threads in some entry of + * ->sv_pools[]. * - * Transitions of the thread count between zero and non-zero are of particular - * interest since the svc_serv needs to be created and initialized at that - * point, or freed. + * Each active thread holds a counted reference on nn->nfsd_serv, as does + * the nn->keep_active flag and various transient calls to svc_get(). * * Finally, the nfsd_mutex also protects some of the global variables that are * accessed when nfsd starts and that are settable via the write_* routines in @@ -88,7 +84,7 @@ DEFINE_MUTEX(nfsd_mutex); * version 4.1 DRC caches. * nfsd_drc_pages_used tracks the current version 4.1 DRC memory usage. */ -spinlock_t nfsd_drc_lock; +DEFINE_SPINLOCK(nfsd_drc_lock); unsigned long nfsd_drc_max_mem; unsigned long nfsd_drc_mem_used; @@ -121,9 +117,7 @@ static struct svc_stat nfsd_acl_svcstats = { static const struct svc_version *nfsd_version[] = { [2] = &nfsd_version2, -#if defined(CONFIG_NFSD_V3) [3] = &nfsd_version3, -#endif #if defined(CONFIG_NFSD_V4) [4] = &nfsd_version4, #endif @@ -221,7 +215,7 @@ int nfsd_vers(struct nfsd_net *nn, int vers, enum vers_op change) case NFSD_TEST: if (nn->nfsd_versions) return nn->nfsd_versions[vers]; - /* Fallthrough */ + fallthrough; case NFSD_AVAIL: return nfsd_support_version(vers); } @@ -297,13 +291,13 @@ static int nfsd_init_socks(struct net *net, const struct cred *cred) if (!list_empty(&nn->nfsd_serv->sv_permsocks)) return 0; - error = svc_create_xprt(nn->nfsd_serv, "udp", net, PF_INET, NFS_PORT, - SVC_SOCK_DEFAULTS, cred); + error = svc_xprt_create(nn->nfsd_serv, "udp", net, PF_INET, NFS_PORT, + SVC_SOCK_DEFAULTS, cred); if (error < 0) return error; - error = svc_create_xprt(nn->nfsd_serv, "tcp", net, PF_INET, NFS_PORT, - SVC_SOCK_DEFAULTS, cred); + error = svc_xprt_create(nn->nfsd_serv, "tcp", net, PF_INET, NFS_PORT, + SVC_SOCK_DEFAULTS, cred); if (error < 0) return error; @@ -312,7 +306,7 @@ static int nfsd_init_socks(struct net *net, const struct cred *cred) static int nfsd_users = 0; -static int nfsd_startup_generic(int nrservs) +static int nfsd_startup_generic(void) { int ret; @@ -349,36 +343,60 @@ static bool nfsd_needs_lockd(struct nfsd_net *nn) return nfsd_vers(nn, 2, NFSD_TEST) || nfsd_vers(nn, 3, NFSD_TEST); } -void nfsd_copy_boot_verifier(__be32 verf[2], struct nfsd_net *nn) +/** + * nfsd_copy_write_verifier - Atomically copy a write verifier + * @verf: buffer in which to receive the verifier cookie + * @nn: NFS net namespace + * + * This function provides a wait-free mechanism for copying the + * namespace's write verifier without tearing it. + */ +void nfsd_copy_write_verifier(__be32 verf[2], struct nfsd_net *nn) { int seq = 0; do { - read_seqbegin_or_lock(&nn->boot_lock, &seq); - /* - * This is opaque to client, so no need to byte-swap. Use - * __force to keep sparse happy. y2038 time_t overflow is - * irrelevant in this usage - */ - verf[0] = (__force __be32)nn->nfssvc_boot.tv_sec; - verf[1] = (__force __be32)nn->nfssvc_boot.tv_nsec; - } while (need_seqretry(&nn->boot_lock, seq)); - done_seqretry(&nn->boot_lock, seq); + read_seqbegin_or_lock(&nn->writeverf_lock, &seq); + memcpy(verf, nn->writeverf, sizeof(*verf)); + } while (need_seqretry(&nn->writeverf_lock, seq)); + done_seqretry(&nn->writeverf_lock, seq); } -static void nfsd_reset_boot_verifier_locked(struct nfsd_net *nn) +static void nfsd_reset_write_verifier_locked(struct nfsd_net *nn) { - ktime_get_real_ts64(&nn->nfssvc_boot); + struct timespec64 now; + u64 verf; + + /* + * Because the time value is hashed, y2038 time_t overflow + * is irrelevant in this usage. + */ + ktime_get_raw_ts64(&now); + verf = siphash_2u64(now.tv_sec, now.tv_nsec, &nn->siphash_key); + memcpy(nn->writeverf, &verf, sizeof(nn->writeverf)); } -void nfsd_reset_boot_verifier(struct nfsd_net *nn) +/** + * nfsd_reset_write_verifier - Generate a new write verifier + * @nn: NFS net namespace + * + * This function updates the ->writeverf field of @nn. This field + * contains an opaque cookie that, according to Section 18.32.3 of + * RFC 8881, "the client can use to determine whether a server has + * changed instance state (e.g., server restart) between a call to + * WRITE and a subsequent call to either WRITE or COMMIT. This + * cookie MUST be unchanged during a single instance of the NFSv4.1 + * server and MUST be unique between instances of the NFSv4.1 + * server." + */ +void nfsd_reset_write_verifier(struct nfsd_net *nn) { - write_seqlock(&nn->boot_lock); - nfsd_reset_boot_verifier_locked(nn); - write_sequnlock(&nn->boot_lock); + write_seqlock(&nn->writeverf_lock); + nfsd_reset_write_verifier_locked(nn); + write_sequnlock(&nn->writeverf_lock); } -static int nfsd_startup_net(int nrservs, struct net *net, const struct cred *cred) +static int nfsd_startup_net(struct net *net, const struct cred *cred) { struct nfsd_net *nn = net_generic(net, nfsd_net_id); int ret; @@ -386,7 +404,7 @@ static int nfsd_startup_net(int nrservs, struct net *net, const struct cred *cre if (nn->nfsd_net_up) return 0; - ret = nfsd_startup_generic(nrservs); + ret = nfsd_startup_generic(); if (ret) return ret; ret = nfsd_init_socks(net, cred); @@ -407,6 +425,9 @@ static int nfsd_startup_net(int nrservs, struct net *net, const struct cred *cre if (ret) goto out_filecache; +#ifdef CONFIG_NFSD_V4_2_INTER_SSC + nfsd4_ssc_init_umount_work(nn); +#endif nn->nfsd_net_up = true; return 0; @@ -436,6 +457,7 @@ static void nfsd_shutdown_net(struct net *net) nfsd_shutdown_generic(); } +static DEFINE_SPINLOCK(nfsd_notifier_lock); static int nfsd_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr) { @@ -445,18 +467,17 @@ static int nfsd_inetaddr_event(struct notifier_block *this, unsigned long event, struct nfsd_net *nn = net_generic(net, nfsd_net_id); struct sockaddr_in sin; - if ((event != NETDEV_DOWN) || - !atomic_inc_not_zero(&nn->ntf_refcnt)) + if (event != NETDEV_DOWN || !nn->nfsd_serv) goto out; + spin_lock(&nfsd_notifier_lock); if (nn->nfsd_serv) { dprintk("nfsd_inetaddr_event: removed %pI4\n", &ifa->ifa_local); sin.sin_family = AF_INET; sin.sin_addr.s_addr = ifa->ifa_local; svc_age_temp_xprts_now(nn->nfsd_serv, (struct sockaddr *)&sin); } - atomic_dec(&nn->ntf_refcnt); - wake_up(&nn->ntf_wq); + spin_unlock(&nfsd_notifier_lock); out: return NOTIFY_DONE; @@ -476,10 +497,10 @@ static int nfsd_inet6addr_event(struct notifier_block *this, struct nfsd_net *nn = net_generic(net, nfsd_net_id); struct sockaddr_in6 sin6; - if ((event != NETDEV_DOWN) || - !atomic_inc_not_zero(&nn->ntf_refcnt)) + if (event != NETDEV_DOWN || !nn->nfsd_serv) goto out; + spin_lock(&nfsd_notifier_lock); if (nn->nfsd_serv) { dprintk("nfsd_inet6addr_event: removed %pI6\n", &ifa->addr); sin6.sin6_family = AF_INET6; @@ -488,8 +509,8 @@ static int nfsd_inet6addr_event(struct notifier_block *this, sin6.sin6_scope_id = ifa->idev->dev->ifindex; svc_age_temp_xprts_now(nn->nfsd_serv, (struct sockaddr *)&sin6); } - atomic_dec(&nn->ntf_refcnt); - wake_up(&nn->ntf_wq); + spin_unlock(&nfsd_notifier_lock); + out: return NOTIFY_DONE; } @@ -506,7 +527,6 @@ static void nfsd_last_thread(struct svc_serv *serv, struct net *net) { struct nfsd_net *nn = net_generic(net, nfsd_net_id); - atomic_dec(&nn->ntf_refcnt); /* check if the notifier still has clients */ if (atomic_dec_return(&nfsd_notifier_refcount) == 0) { unregister_inetaddr_notifier(&nfsd_inetaddr_notifier); @@ -514,7 +534,6 @@ static void nfsd_last_thread(struct svc_serv *serv, struct net *net) unregister_inet6addr_notifier(&nfsd_inet6addr_notifier); #endif } - wait_event(nn->ntf_wq, atomic_read(&nn->ntf_refcnt) == 0); /* * write_ports can create the server without actually starting @@ -527,8 +546,7 @@ static void nfsd_last_thread(struct svc_serv *serv, struct net *net) return; nfsd_shutdown_net(net); - printk(KERN_WARNING "nfsd: last server has exited, flushing export " - "cache\n"); + pr_info("nfsd: last server has exited, flushing export cache\n"); nfsd_export_flush(net); } @@ -568,7 +586,6 @@ static void set_max_drc(void) nfsd_drc_max_mem = (nr_free_buffer_pages() >> NFSD_DRC_SIZE_SHIFT) * PAGE_SIZE; nfsd_drc_mem_used = 0; - spin_lock_init(&nfsd_drc_lock); dprintk("%s nfsd_drc_max_mem %lu \n", __func__, nfsd_drc_max_mem); } @@ -593,18 +610,35 @@ static int nfsd_get_default_max_blksize(void) return ret; } -static const struct svc_serv_ops nfsd_thread_sv_ops = { - .svo_shutdown = nfsd_last_thread, - .svo_function = nfsd, - .svo_enqueue_xprt = svc_xprt_do_enqueue, - .svo_setup = svc_set_num_threads, - .svo_module = THIS_MODULE, -}; +void nfsd_shutdown_threads(struct net *net) +{ + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + struct svc_serv *serv; + + mutex_lock(&nfsd_mutex); + serv = nn->nfsd_serv; + if (serv == NULL) { + mutex_unlock(&nfsd_mutex); + return; + } + + svc_get(serv); + /* Kill outstanding nfsd threads */ + svc_set_num_threads(serv, NULL, 0); + nfsd_put(net); + mutex_unlock(&nfsd_mutex); +} + +bool i_am_nfsd(void) +{ + return kthread_func(current) == nfsd; +} int nfsd_create_serv(struct net *net) { int error; struct nfsd_net *nn = net_generic(net, nfsd_net_id); + struct svc_serv *serv; WARN_ON(!mutex_is_locked(&nfsd_mutex)); if (nn->nfsd_serv) { @@ -614,17 +648,22 @@ int nfsd_create_serv(struct net *net) if (nfsd_max_blksize == 0) nfsd_max_blksize = nfsd_get_default_max_blksize(); nfsd_reset_versions(nn); - nn->nfsd_serv = svc_create_pooled(&nfsd_program, nfsd_max_blksize, - &nfsd_thread_sv_ops); - if (nn->nfsd_serv == NULL) + serv = svc_create_pooled(&nfsd_program, nfsd_max_blksize, nfsd); + if (serv == NULL) return -ENOMEM; - nn->nfsd_serv->sv_maxconn = nn->max_connections; - error = svc_bind(nn->nfsd_serv, net); + serv->sv_maxconn = nn->max_connections; + error = svc_bind(serv, net); if (error < 0) { - svc_destroy(nn->nfsd_serv); + /* NOT nfsd_put() as notifiers (see below) haven't + * been set up yet. + */ + svc_put(serv); return error; } + spin_lock(&nfsd_notifier_lock); + nn->nfsd_serv = serv; + spin_unlock(&nfsd_notifier_lock); set_max_drc(); /* check if the notifier is already set */ @@ -634,8 +673,7 @@ int nfsd_create_serv(struct net *net) register_inet6addr_notifier(&nfsd_inet6addr_notifier); #endif } - atomic_inc(&nn->ntf_refcnt); - nfsd_reset_boot_verifier(nn); + nfsd_reset_write_verifier(nn); return 0; } @@ -662,16 +700,27 @@ int nfsd_get_nrthreads(int n, int *nthreads, struct net *net) return 0; } -void nfsd_destroy(struct net *net) +/* This is the callback for kref_put() below. + * There is no code here as the first thing to be done is + * call svc_shutdown_net(), but we cannot get the 'net' from + * the kref. So do all the work when kref_put returns true. + */ +static void nfsd_noop(struct kref *ref) +{ +} + +void nfsd_put(struct net *net) { struct nfsd_net *nn = net_generic(net, nfsd_net_id); - int destroy = (nn->nfsd_serv->sv_nrthreads == 1); - if (destroy) - svc_shutdown_net(nn->nfsd_serv, net); - svc_destroy(nn->nfsd_serv); - if (destroy) + if (kref_put(&nn->nfsd_serv->sv_refcnt, nfsd_noop)) { + svc_xprt_destroy_all(nn->nfsd_serv, net); + nfsd_last_thread(nn->nfsd_serv, net); + svc_destroy(&nn->nfsd_serv->sv_refcnt); + spin_lock(&nfsd_notifier_lock); nn->nfsd_serv = NULL; + spin_unlock(&nfsd_notifier_lock); + } } int nfsd_set_nrthreads(int n, int *nthreads, struct net *net) @@ -698,7 +747,7 @@ int nfsd_set_nrthreads(int n, int *nthreads, struct net *net) if (tot > NFSD_MAXSERVS) { /* total too large: scale down requested numbers */ for (i = 0; i < n && tot > 0; i++) { - int new = nthreads[i] * NFSD_MAXSERVS / tot; + int new = nthreads[i] * NFSD_MAXSERVS / tot; tot -= (nthreads[i] - new); nthreads[i] = new; } @@ -718,12 +767,13 @@ int nfsd_set_nrthreads(int n, int *nthreads, struct net *net) /* apply the new numbers */ svc_get(nn->nfsd_serv); for (i = 0; i < n; i++) { - err = nn->nfsd_serv->sv_ops->svo_setup(nn->nfsd_serv, - &nn->nfsd_serv->sv_pools[i], nthreads[i]); + err = svc_set_num_threads(nn->nfsd_serv, + &nn->nfsd_serv->sv_pools[i], + nthreads[i]); if (err) break; } - nfsd_destroy(net); + nfsd_put(net); return err; } @@ -749,29 +799,30 @@ nfsd_svc(int nrservs, struct net *net, const struct cred *cred) if (nrservs == 0 && nn->nfsd_serv == NULL) goto out; + strscpy(nn->nfsd_name, utsname()->nodename, + sizeof(nn->nfsd_name)); + error = nfsd_create_serv(net); if (error) goto out; nfsd_up_before = nn->nfsd_net_up; - error = nfsd_startup_net(nrservs, net, cred); + error = nfsd_startup_net(net, cred); if (error) - goto out_destroy; - error = nn->nfsd_serv->sv_ops->svo_setup(nn->nfsd_serv, - NULL, nrservs); + goto out_put; + error = svc_set_num_threads(nn->nfsd_serv, NULL, nrservs); if (error) goto out_shutdown; - /* We are holding a reference to nn->nfsd_serv which - * we don't want to count in the return value, - * so subtract 1 - */ - error = nn->nfsd_serv->sv_nrthreads - 1; + error = nn->nfsd_serv->sv_nrthreads; out_shutdown: if (error < 0 && !nfsd_up_before) nfsd_shutdown_net(net); -out_destroy: - nfsd_destroy(net); /* Release server */ +out_put: + /* Threads now hold service active */ + if (xchg(&nn->keep_active, 0)) + nfsd_put(net); + nfsd_put(net); out: mutex_unlock(&nfsd_mutex); return error; @@ -885,9 +936,6 @@ nfsd(void *vrqstp) struct nfsd_net *nn = net_generic(net, nfsd_net_id); int err; - /* Lock module and set up kernel thread */ - mutex_lock(&nfsd_mutex); - /* At this point, the thread shares current->fs * with the init process. We need to create files with the * umask as defined by the client instead of init's umask. */ @@ -907,8 +955,7 @@ nfsd(void *vrqstp) allow_signal(SIGINT); allow_signal(SIGQUIT); - nfsdstats.th_cnt++; - mutex_unlock(&nfsd_mutex); + atomic_inc(&nfsdstats.th_cnt); set_freezable(); @@ -935,139 +982,133 @@ nfsd(void *vrqstp) /* Clear signals before calling svc_exit_thread() */ flush_signals(current); - mutex_lock(&nfsd_mutex); - nfsdstats.th_cnt --; + atomic_dec(&nfsdstats.th_cnt); out: - rqstp->rq_server = NULL; + /* Take an extra ref so that the svc_put in svc_exit_thread() + * doesn't call svc_destroy() + */ + svc_get(nn->nfsd_serv); /* Release the thread */ svc_exit_thread(rqstp); - nfsd_destroy(net); + /* We need to drop a ref, but may not drop the last reference + * without holding nfsd_mutex, and we cannot wait for nfsd_mutex as that + * could deadlock with nfsd_shutdown_threads() waiting for us. + * So three options are: + * - drop a non-final reference, + * - get the mutex without waiting + * - sleep briefly andd try the above again + */ + while (!svc_put_not_last(nn->nfsd_serv)) { + if (mutex_trylock(&nfsd_mutex)) { + nfsd_put(net); + mutex_unlock(&nfsd_mutex); + break; + } + msleep(20); + } - /* Release module */ - mutex_unlock(&nfsd_mutex); - module_put_and_exit(0); return 0; } -static __be32 map_new_errors(u32 vers, __be32 nfserr) -{ - if (nfserr == nfserr_jukebox && vers == 2) - return nfserr_dropit; - if (nfserr == nfserr_wrongsec && vers < 4) - return nfserr_acces; - return nfserr; -} - -/* - * A write procedure can have a large argument, and a read procedure can - * have a large reply, but no NFSv2 or NFSv3 procedure has argument and - * reply that can both be larger than a page. The xdr code has taken - * advantage of this assumption to be a sloppy about bounds checking in - * some cases. Pending a rewrite of the NFSv2/v3 xdr code to fix that - * problem, we enforce these assumptions here: +/** + * nfsd_dispatch - Process an NFS or NFSACL Request + * @rqstp: incoming request + * @statp: pointer to location of accept_stat field in RPC Reply buffer + * + * This RPC dispatcher integrates the NFS server's duplicate reply cache. + * + * Return values: + * %0: Processing complete; do not send a Reply + * %1: Processing complete; send Reply in rqstp->rq_res */ -static bool nfs_request_too_big(struct svc_rqst *rqstp, - const struct svc_procedure *proc) +int nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp) { - /* - * The ACL code has more careful bounds-checking and is not - * susceptible to this problem: - */ - if (rqstp->rq_prog != NFS_PROGRAM) - return false; - /* - * Ditto NFSv4 (which can in theory have argument and reply both - * more than a page): - */ - if (rqstp->rq_vers >= 4) - return false; - /* The reply will be small, we're OK: */ - if (proc->pc_xdrressize > 0 && - proc->pc_xdrressize < XDR_QUADLEN(PAGE_SIZE)) - return false; - - return rqstp->rq_arg.len > PAGE_SIZE; -} + const struct svc_procedure *proc = rqstp->rq_procinfo; -int -nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp) -{ - const struct svc_procedure *proc; - __be32 nfserr; - __be32 *nfserrp; - - dprintk("nfsd_dispatch: vers %d proc %d\n", - rqstp->rq_vers, rqstp->rq_proc); - proc = rqstp->rq_procinfo; - - if (nfs_request_too_big(rqstp, proc)) { - dprintk("nfsd: NFSv%d argument too large\n", rqstp->rq_vers); - *statp = rpc_garbage_args; - return 1; - } /* * Give the xdr decoder a chance to change this if it wants * (necessary in the NFSv4.0 compound case) */ rqstp->rq_cachetype = proc->pc_cachetype; - /* Decode arguments */ - if (proc->pc_decode && - !proc->pc_decode(rqstp, (__be32*)rqstp->rq_arg.head[0].iov_base)) { - dprintk("nfsd: failed to decode arguments!\n"); - *statp = rpc_garbage_args; - return 1; - } - /* Check whether we have this call in the cache. */ + svcxdr_init_decode(rqstp); + if (!proc->pc_decode(rqstp, &rqstp->rq_arg_stream)) + goto out_decode_err; + switch (nfsd_cache_lookup(rqstp)) { - case RC_DROPIT: - return 0; + case RC_DOIT: + break; case RC_REPLY: - return 1; - case RC_DOIT:; - /* do it */ + goto out_cached_reply; + case RC_DROPIT: + goto out_dropit; } - /* need to grab the location to store the status, as - * nfsv4 does some encoding while processing + /* + * Need to grab the location to store the status, as + * NFSv4 does some encoding while processing */ - nfserrp = rqstp->rq_res.head[0].iov_base - + rqstp->rq_res.head[0].iov_len; - rqstp->rq_res.head[0].iov_len += sizeof(__be32); - - /* Now call the procedure handler, and encode NFS status. */ - nfserr = proc->pc_func(rqstp); - nfserr = map_new_errors(rqstp->rq_vers, nfserr); - if (nfserr == nfserr_dropit || test_bit(RQ_DROPME, &rqstp->rq_flags)) { - dprintk("nfsd: Dropping request; may be revisited later\n"); - nfsd_cache_update(rqstp, RC_NOCACHE, NULL); - return 0; - } + svcxdr_init_encode(rqstp); - if (rqstp->rq_proc != 0) - *nfserrp++ = nfserr; + *statp = proc->pc_func(rqstp); + if (*statp == rpc_drop_reply || test_bit(RQ_DROPME, &rqstp->rq_flags)) + goto out_update_drop; - /* Encode result. - * For NFSv2, additional info is never returned in case of an error. - */ - if (!(nfserr && rqstp->rq_vers == 2)) { - if (proc->pc_encode && !proc->pc_encode(rqstp, nfserrp)) { - /* Failed to encode result. Release cache entry */ - dprintk("nfsd: failed to encode result!\n"); - nfsd_cache_update(rqstp, RC_NOCACHE, NULL); - *statp = rpc_system_err; - return 1; - } - } + if (!proc->pc_encode(rqstp, &rqstp->rq_res_stream)) + goto out_encode_err; - /* Store reply in cache. */ nfsd_cache_update(rqstp, rqstp->rq_cachetype, statp + 1); +out_cached_reply: + return 1; + +out_decode_err: + trace_nfsd_garbage_args_err(rqstp); + *statp = rpc_garbage_args; + return 1; + +out_update_drop: + nfsd_cache_update(rqstp, RC_NOCACHE, NULL); +out_dropit: + return 0; + +out_encode_err: + trace_nfsd_cant_encode_err(rqstp); + nfsd_cache_update(rqstp, RC_NOCACHE, NULL); + *statp = rpc_system_err; return 1; } +/** + * nfssvc_decode_voidarg - Decode void arguments + * @rqstp: Server RPC transaction context + * @xdr: XDR stream positioned at arguments to decode + * + * Return values: + * %false: Arguments were not valid + * %true: Decoding was successful + */ +bool nfssvc_decode_voidarg(struct svc_rqst *rqstp, struct xdr_stream *xdr) +{ + return true; +} + +/** + * nfssvc_encode_voidres - Encode void results + * @rqstp: Server RPC transaction context + * @xdr: XDR stream into which to encode results + * + * Return values: + * %false: Local error while encoding + * %true: Encoding was successful + */ +bool nfssvc_encode_voidres(struct svc_rqst *rqstp, struct xdr_stream *xdr) +{ + return true; +} + int nfsd_pool_stats_open(struct inode *inode, struct file *file) { int ret; @@ -1078,7 +1119,6 @@ int nfsd_pool_stats_open(struct inode *inode, struct file *file) mutex_unlock(&nfsd_mutex); return -ENODEV; } - /* bump up the psudo refcount while traversing */ svc_get(nn->nfsd_serv); ret = svc_pool_stats_open(nn->nfsd_serv, file); mutex_unlock(&nfsd_mutex); @@ -1091,8 +1131,7 @@ int nfsd_pool_stats_release(struct inode *inode, struct file *file) struct net *net = inode->i_sb->s_fs_info; mutex_lock(&nfsd_mutex); - /* this function really, really should have been called svc_put() */ - nfsd_destroy(net); + nfsd_put(net); mutex_unlock(&nfsd_mutex); return ret; } diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c index b51fe515f06f..caf6355b18fa 100644 --- a/fs/nfsd/nfsxdr.c +++ b/fs/nfsd/nfsxdr.c @@ -9,12 +9,10 @@ #include "xdr.h" #include "auth.h" -#define NFSDDBG_FACILITY NFSDDBG_XDR - /* * Mapping of S_IF* types to NFS file types */ -static u32 nfs_ftypes[] = { +static const u32 nfs_ftypes[] = { NFNON, NFCHR, NFCHR, NFBAD, NFDIR, NFBAD, NFBLK, NFBAD, NFREG, NFBAD, NFLNK, NFBAD, @@ -23,93 +21,168 @@ static u32 nfs_ftypes[] = { /* - * XDR functions for basic NFS types + * Basic NFSv2 data types (RFC 1094 Section 2.3) */ -static __be32 * -decode_fh(__be32 *p, struct svc_fh *fhp) + +/** + * svcxdr_encode_stat - Encode an NFSv2 status code + * @xdr: XDR stream + * @status: status value to encode + * + * Return values: + * %false: Send buffer space was exhausted + * %true: Success + */ +bool +svcxdr_encode_stat(struct xdr_stream *xdr, __be32 status) { + __be32 *p; + + p = xdr_reserve_space(xdr, sizeof(status)); + if (!p) + return false; + *p = status; + + return true; +} + +/** + * svcxdr_decode_fhandle - Decode an NFSv2 file handle + * @xdr: XDR stream positioned at an encoded NFSv2 FH + * @fhp: OUT: filled-in server file handle + * + * Return values: + * %false: The encoded file handle was not valid + * %true: @fhp has been initialized + */ +bool +svcxdr_decode_fhandle(struct xdr_stream *xdr, struct svc_fh *fhp) +{ + __be32 *p; + + p = xdr_inline_decode(xdr, NFS_FHSIZE); + if (!p) + return false; fh_init(fhp, NFS_FHSIZE); - memcpy(&fhp->fh_handle.fh_base, p, NFS_FHSIZE); + memcpy(&fhp->fh_handle.fh_raw, p, NFS_FHSIZE); fhp->fh_handle.fh_size = NFS_FHSIZE; - /* FIXME: Look up export pointer here and verify - * Sun Secure RPC if requested */ - return p + (NFS_FHSIZE >> 2); + return true; } -/* Helper function for NFSv2 ACL code */ -__be32 *nfs2svc_decode_fh(__be32 *p, struct svc_fh *fhp) +static bool +svcxdr_encode_fhandle(struct xdr_stream *xdr, const struct svc_fh *fhp) { - return decode_fh(p, fhp); + __be32 *p; + + p = xdr_reserve_space(xdr, NFS_FHSIZE); + if (!p) + return false; + memcpy(p, &fhp->fh_handle.fh_raw, NFS_FHSIZE); + + return true; } static __be32 * -encode_fh(__be32 *p, struct svc_fh *fhp) +encode_timeval(__be32 *p, const struct timespec64 *time) { - memcpy(p, &fhp->fh_handle.fh_base, NFS_FHSIZE); - return p + (NFS_FHSIZE>> 2); + *p++ = cpu_to_be32((u32)time->tv_sec); + if (time->tv_nsec) + *p++ = cpu_to_be32(time->tv_nsec / NSEC_PER_USEC); + else + *p++ = xdr_zero; + return p; } -/* - * Decode a file name and make sure that the path contains - * no slashes or null bytes. - */ -static __be32 * -decode_filename(__be32 *p, char **namp, unsigned int *lenp) +static bool +svcxdr_decode_filename(struct xdr_stream *xdr, char **name, unsigned int *len) { - char *name; - unsigned int i; - - if ((p = xdr_decode_string_inplace(p, namp, lenp, NFS_MAXNAMLEN)) != NULL) { - for (i = 0, name = *namp; i < *lenp; i++, name++) { - if (*name == '\0' || *name == '/') - return NULL; - } - } + u32 size, i; + __be32 *p; + char *c; + + if (xdr_stream_decode_u32(xdr, &size) < 0) + return false; + if (size == 0 || size > NFS_MAXNAMLEN) + return false; + p = xdr_inline_decode(xdr, size); + if (!p) + return false; - return p; + *len = size; + *name = (char *)p; + for (i = 0, c = *name; i < size; i++, c++) + if (*c == '\0' || *c == '/') + return false; + + return true; } -static __be32 * -decode_sattr(__be32 *p, struct iattr *iap, struct user_namespace *userns) +static bool +svcxdr_decode_diropargs(struct xdr_stream *xdr, struct svc_fh *fhp, + char **name, unsigned int *len) { - u32 tmp, tmp1; + return svcxdr_decode_fhandle(xdr, fhp) && + svcxdr_decode_filename(xdr, name, len); +} + +static bool +svcxdr_decode_sattr(struct svc_rqst *rqstp, struct xdr_stream *xdr, + struct iattr *iap) +{ + u32 tmp1, tmp2; + __be32 *p; + + p = xdr_inline_decode(xdr, XDR_UNIT * 8); + if (!p) + return false; iap->ia_valid = 0; - /* Sun client bug compatibility check: some sun clients seem to - * put 0xffff in the mode field when they mean 0xffffffff. - * Quoting the 4.4BSD nfs server code: Nah nah nah nah na nah. + /* + * Some Sun clients put 0xffff in the mode field when they + * mean 0xffffffff. */ - if ((tmp = ntohl(*p++)) != (u32)-1 && tmp != 0xffff) { + tmp1 = be32_to_cpup(p++); + if (tmp1 != (u32)-1 && tmp1 != 0xffff) { iap->ia_valid |= ATTR_MODE; - iap->ia_mode = tmp; + iap->ia_mode = tmp1; } - if ((tmp = ntohl(*p++)) != (u32)-1) { - iap->ia_uid = make_kuid(userns, tmp); + + tmp1 = be32_to_cpup(p++); + if (tmp1 != (u32)-1) { + iap->ia_uid = make_kuid(nfsd_user_namespace(rqstp), tmp1); if (uid_valid(iap->ia_uid)) iap->ia_valid |= ATTR_UID; } - if ((tmp = ntohl(*p++)) != (u32)-1) { - iap->ia_gid = make_kgid(userns, tmp); + + tmp1 = be32_to_cpup(p++); + if (tmp1 != (u32)-1) { + iap->ia_gid = make_kgid(nfsd_user_namespace(rqstp), tmp1); if (gid_valid(iap->ia_gid)) iap->ia_valid |= ATTR_GID; } - if ((tmp = ntohl(*p++)) != (u32)-1) { + + tmp1 = be32_to_cpup(p++); + if (tmp1 != (u32)-1) { iap->ia_valid |= ATTR_SIZE; - iap->ia_size = tmp; + iap->ia_size = tmp1; } - tmp = ntohl(*p++); tmp1 = ntohl(*p++); - if (tmp != (u32)-1 && tmp1 != (u32)-1) { + + tmp1 = be32_to_cpup(p++); + tmp2 = be32_to_cpup(p++); + if (tmp1 != (u32)-1 && tmp2 != (u32)-1) { iap->ia_valid |= ATTR_ATIME | ATTR_ATIME_SET; - iap->ia_atime.tv_sec = tmp; - iap->ia_atime.tv_nsec = tmp1 * 1000; + iap->ia_atime.tv_sec = tmp1; + iap->ia_atime.tv_nsec = tmp2 * NSEC_PER_USEC; } - tmp = ntohl(*p++); tmp1 = ntohl(*p++); - if (tmp != (u32)-1 && tmp1 != (u32)-1) { + + tmp1 = be32_to_cpup(p++); + tmp2 = be32_to_cpup(p++); + if (tmp1 != (u32)-1 && tmp2 != (u32)-1) { iap->ia_valid |= ATTR_MTIME | ATTR_MTIME_SET; - iap->ia_mtime.tv_sec = tmp; - iap->ia_mtime.tv_nsec = tmp1 * 1000; + iap->ia_mtime.tv_sec = tmp1; + iap->ia_mtime.tv_nsec = tmp2 * NSEC_PER_USEC; /* * Passing the invalid value useconds=1000000 for mtime * is a Sun convention for "set both mtime and atime to @@ -119,452 +192,469 @@ decode_sattr(__be32 *p, struct iattr *iap, struct user_namespace *userns) * sattr in section 6.1 of "NFS Illustrated" by * Brent Callaghan, Addison-Wesley, ISBN 0-201-32750-5 */ - if (tmp1 == 1000000) + if (tmp2 == 1000000) iap->ia_valid &= ~(ATTR_ATIME_SET|ATTR_MTIME_SET); } - return p; + + return true; } -static __be32 * -encode_fattr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp, - struct kstat *stat) +/** + * svcxdr_encode_fattr - Encode NFSv2 file attributes + * @rqstp: Context of a completed RPC transaction + * @xdr: XDR stream + * @fhp: File handle to encode + * @stat: Attributes to encode + * + * Return values: + * %false: Send buffer space was exhausted + * %true: Success + */ +bool +svcxdr_encode_fattr(struct svc_rqst *rqstp, struct xdr_stream *xdr, + const struct svc_fh *fhp, const struct kstat *stat) { struct user_namespace *userns = nfsd_user_namespace(rqstp); - struct dentry *dentry = fhp->fh_dentry; - int type; + struct dentry *dentry = fhp->fh_dentry; + int type = stat->mode & S_IFMT; struct timespec64 time; - u32 f; + __be32 *p; + u32 fsid; - type = (stat->mode & S_IFMT); + p = xdr_reserve_space(xdr, XDR_UNIT * 17); + if (!p) + return false; - *p++ = htonl(nfs_ftypes[type >> 12]); - *p++ = htonl((u32) stat->mode); - *p++ = htonl((u32) stat->nlink); - *p++ = htonl((u32) from_kuid_munged(userns, stat->uid)); - *p++ = htonl((u32) from_kgid_munged(userns, stat->gid)); + *p++ = cpu_to_be32(nfs_ftypes[type >> 12]); + *p++ = cpu_to_be32((u32)stat->mode); + *p++ = cpu_to_be32((u32)stat->nlink); + *p++ = cpu_to_be32((u32)from_kuid_munged(userns, stat->uid)); + *p++ = cpu_to_be32((u32)from_kgid_munged(userns, stat->gid)); - if (S_ISLNK(type) && stat->size > NFS_MAXPATHLEN) { - *p++ = htonl(NFS_MAXPATHLEN); - } else { - *p++ = htonl((u32) stat->size); - } - *p++ = htonl((u32) stat->blksize); + if (S_ISLNK(type) && stat->size > NFS_MAXPATHLEN) + *p++ = cpu_to_be32(NFS_MAXPATHLEN); + else + *p++ = cpu_to_be32((u32) stat->size); + *p++ = cpu_to_be32((u32) stat->blksize); if (S_ISCHR(type) || S_ISBLK(type)) - *p++ = htonl(new_encode_dev(stat->rdev)); + *p++ = cpu_to_be32(new_encode_dev(stat->rdev)); else - *p++ = htonl(0xffffffff); - *p++ = htonl((u32) stat->blocks); + *p++ = cpu_to_be32(0xffffffff); + *p++ = cpu_to_be32((u32)stat->blocks); + switch (fsid_source(fhp)) { - default: - case FSIDSOURCE_DEV: - *p++ = htonl(new_encode_dev(stat->dev)); - break; case FSIDSOURCE_FSID: - *p++ = htonl((u32) fhp->fh_export->ex_fsid); + fsid = (u32)fhp->fh_export->ex_fsid; break; case FSIDSOURCE_UUID: - f = ((u32*)fhp->fh_export->ex_uuid)[0]; - f ^= ((u32*)fhp->fh_export->ex_uuid)[1]; - f ^= ((u32*)fhp->fh_export->ex_uuid)[2]; - f ^= ((u32*)fhp->fh_export->ex_uuid)[3]; - *p++ = htonl(f); + fsid = ((u32 *)fhp->fh_export->ex_uuid)[0]; + fsid ^= ((u32 *)fhp->fh_export->ex_uuid)[1]; + fsid ^= ((u32 *)fhp->fh_export->ex_uuid)[2]; + fsid ^= ((u32 *)fhp->fh_export->ex_uuid)[3]; + break; + default: + fsid = new_encode_dev(stat->dev); break; } - *p++ = htonl((u32) stat->ino); - *p++ = htonl((u32) stat->atime.tv_sec); - *p++ = htonl(stat->atime.tv_nsec ? stat->atime.tv_nsec / 1000 : 0); + *p++ = cpu_to_be32(fsid); + + *p++ = cpu_to_be32((u32)stat->ino); + p = encode_timeval(p, &stat->atime); time = stat->mtime; - lease_get_mtime(d_inode(dentry), &time); - *p++ = htonl((u32) time.tv_sec); - *p++ = htonl(time.tv_nsec ? time.tv_nsec / 1000 : 0); - *p++ = htonl((u32) stat->ctime.tv_sec); - *p++ = htonl(stat->ctime.tv_nsec ? stat->ctime.tv_nsec / 1000 : 0); + lease_get_mtime(d_inode(dentry), &time); + p = encode_timeval(p, &time); + encode_timeval(p, &stat->ctime); - return p; -} - -/* Helper function for NFSv2 ACL code */ -__be32 *nfs2svc_encode_fattr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp, struct kstat *stat) -{ - return encode_fattr(rqstp, p, fhp, stat); + return true; } /* * XDR decode functions */ -int -nfssvc_decode_void(struct svc_rqst *rqstp, __be32 *p) -{ - return xdr_argsize_check(rqstp, p); -} -int -nfssvc_decode_fhandle(struct svc_rqst *rqstp, __be32 *p) +bool +nfssvc_decode_fhandleargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nfsd_fhandle *args = rqstp->rq_argp; - p = decode_fh(p, &args->fh); - if (!p) - return 0; - return xdr_argsize_check(rqstp, p); + return svcxdr_decode_fhandle(xdr, &args->fh); } -int -nfssvc_decode_sattrargs(struct svc_rqst *rqstp, __be32 *p) +bool +nfssvc_decode_sattrargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nfsd_sattrargs *args = rqstp->rq_argp; - p = decode_fh(p, &args->fh); - if (!p) - return 0; - p = decode_sattr(p, &args->attrs, nfsd_user_namespace(rqstp)); - - return xdr_argsize_check(rqstp, p); + return svcxdr_decode_fhandle(xdr, &args->fh) && + svcxdr_decode_sattr(rqstp, xdr, &args->attrs); } -int -nfssvc_decode_diropargs(struct svc_rqst *rqstp, __be32 *p) +bool +nfssvc_decode_diropargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nfsd_diropargs *args = rqstp->rq_argp; - if (!(p = decode_fh(p, &args->fh)) - || !(p = decode_filename(p, &args->name, &args->len))) - return 0; - - return xdr_argsize_check(rqstp, p); + return svcxdr_decode_diropargs(xdr, &args->fh, &args->name, &args->len); } -int -nfssvc_decode_readargs(struct svc_rqst *rqstp, __be32 *p) +bool +nfssvc_decode_readargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nfsd_readargs *args = rqstp->rq_argp; - unsigned int len; - int v; - p = decode_fh(p, &args->fh); - if (!p) - return 0; - - args->offset = ntohl(*p++); - len = args->count = ntohl(*p++); - p++; /* totalcount - unused */ - - len = min_t(unsigned int, len, NFSSVC_MAXBLKSIZE_V2); - - /* set up somewhere to store response. - * We take pages, put them on reslist and include in iovec - */ - v=0; - while (len > 0) { - struct page *p = *(rqstp->rq_next_page++); - - rqstp->rq_vec[v].iov_base = page_address(p); - rqstp->rq_vec[v].iov_len = min_t(unsigned int, len, PAGE_SIZE); - len -= rqstp->rq_vec[v].iov_len; - v++; - } - args->vlen = v; - return xdr_argsize_check(rqstp, p); + u32 totalcount; + + if (!svcxdr_decode_fhandle(xdr, &args->fh)) + return false; + if (xdr_stream_decode_u32(xdr, &args->offset) < 0) + return false; + if (xdr_stream_decode_u32(xdr, &args->count) < 0) + return false; + /* totalcount is ignored */ + if (xdr_stream_decode_u32(xdr, &totalcount) < 0) + return false; + + return true; } -int -nfssvc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p) +bool +nfssvc_decode_writeargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nfsd_writeargs *args = rqstp->rq_argp; - unsigned int len, hdr, dlen; - struct kvec *head = rqstp->rq_arg.head; - - p = decode_fh(p, &args->fh); - if (!p) - return 0; - - p++; /* beginoffset */ - args->offset = ntohl(*p++); /* offset */ - p++; /* totalcount */ - len = args->len = ntohl(*p++); - /* - * The protocol specifies a maximum of 8192 bytes. - */ - if (len > NFSSVC_MAXBLKSIZE_V2) - return 0; - - /* - * Check to make sure that we got the right number of - * bytes. - */ - hdr = (void*)p - head->iov_base; - if (hdr > head->iov_len) - return 0; - dlen = head->iov_len + rqstp->rq_arg.page_len - hdr; - - /* - * Round the length of the data which was specified up to - * the next multiple of XDR units and then compare that - * against the length which was actually received. - * Note that when RPCSEC/GSS (for example) is used, the - * data buffer can be padded so dlen might be larger - * than required. It must never be smaller. - */ - if (dlen < XDR_QUADLEN(len)*4) - return 0; - - args->first.iov_base = (void *)p; - args->first.iov_len = head->iov_len - hdr; - return 1; + u32 beginoffset, totalcount; + + if (!svcxdr_decode_fhandle(xdr, &args->fh)) + return false; + /* beginoffset is ignored */ + if (xdr_stream_decode_u32(xdr, &beginoffset) < 0) + return false; + if (xdr_stream_decode_u32(xdr, &args->offset) < 0) + return false; + /* totalcount is ignored */ + if (xdr_stream_decode_u32(xdr, &totalcount) < 0) + return false; + + /* opaque data */ + if (xdr_stream_decode_u32(xdr, &args->len) < 0) + return false; + if (args->len > NFSSVC_MAXBLKSIZE_V2) + return false; + + return xdr_stream_subsegment(xdr, &args->payload, args->len); } -int -nfssvc_decode_createargs(struct svc_rqst *rqstp, __be32 *p) +bool +nfssvc_decode_createargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nfsd_createargs *args = rqstp->rq_argp; - if ( !(p = decode_fh(p, &args->fh)) - || !(p = decode_filename(p, &args->name, &args->len))) - return 0; - p = decode_sattr(p, &args->attrs, nfsd_user_namespace(rqstp)); - - return xdr_argsize_check(rqstp, p); + return svcxdr_decode_diropargs(xdr, &args->fh, + &args->name, &args->len) && + svcxdr_decode_sattr(rqstp, xdr, &args->attrs); } -int -nfssvc_decode_renameargs(struct svc_rqst *rqstp, __be32 *p) +bool +nfssvc_decode_renameargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nfsd_renameargs *args = rqstp->rq_argp; - if (!(p = decode_fh(p, &args->ffh)) - || !(p = decode_filename(p, &args->fname, &args->flen)) - || !(p = decode_fh(p, &args->tfh)) - || !(p = decode_filename(p, &args->tname, &args->tlen))) - return 0; - - return xdr_argsize_check(rqstp, p); + return svcxdr_decode_diropargs(xdr, &args->ffh, + &args->fname, &args->flen) && + svcxdr_decode_diropargs(xdr, &args->tfh, + &args->tname, &args->tlen); } -int -nfssvc_decode_readlinkargs(struct svc_rqst *rqstp, __be32 *p) -{ - struct nfsd_readlinkargs *args = rqstp->rq_argp; - - p = decode_fh(p, &args->fh); - if (!p) - return 0; - args->buffer = page_address(*(rqstp->rq_next_page++)); - - return xdr_argsize_check(rqstp, p); -} - -int -nfssvc_decode_linkargs(struct svc_rqst *rqstp, __be32 *p) +bool +nfssvc_decode_linkargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nfsd_linkargs *args = rqstp->rq_argp; - if (!(p = decode_fh(p, &args->ffh)) - || !(p = decode_fh(p, &args->tfh)) - || !(p = decode_filename(p, &args->tname, &args->tlen))) - return 0; - - return xdr_argsize_check(rqstp, p); + return svcxdr_decode_fhandle(xdr, &args->ffh) && + svcxdr_decode_diropargs(xdr, &args->tfh, + &args->tname, &args->tlen); } -int -nfssvc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p) +bool +nfssvc_decode_symlinkargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nfsd_symlinkargs *args = rqstp->rq_argp; - char *base = (char *)p; - size_t xdrlen; - - if ( !(p = decode_fh(p, &args->ffh)) - || !(p = decode_filename(p, &args->fname, &args->flen))) - return 0; + struct kvec *head = rqstp->rq_arg.head; - args->tlen = ntohl(*p++); + if (!svcxdr_decode_diropargs(xdr, &args->ffh, &args->fname, &args->flen)) + return false; + if (xdr_stream_decode_u32(xdr, &args->tlen) < 0) + return false; if (args->tlen == 0) - return 0; + return false; - args->first.iov_base = p; - args->first.iov_len = rqstp->rq_arg.head[0].iov_len; - args->first.iov_len -= (char *)p - base; - - /* This request is never larger than a page. Therefore, - * transport will deliver either: - * 1. pathname in the pagelist -> sattr is in the tail. - * 2. everything in the head buffer -> sattr is in the head. - */ - if (rqstp->rq_arg.page_len) { - if (args->tlen != rqstp->rq_arg.page_len) - return 0; - p = rqstp->rq_arg.tail[0].iov_base; - } else { - xdrlen = XDR_QUADLEN(args->tlen); - if (xdrlen > args->first.iov_len - (8 * sizeof(__be32))) - return 0; - p += xdrlen; - } - decode_sattr(p, &args->attrs, nfsd_user_namespace(rqstp)); - - return 1; + args->first.iov_len = head->iov_len - xdr_stream_pos(xdr); + args->first.iov_base = xdr_inline_decode(xdr, args->tlen); + if (!args->first.iov_base) + return false; + return svcxdr_decode_sattr(rqstp, xdr, &args->attrs); } -int -nfssvc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p) +bool +nfssvc_decode_readdirargs(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nfsd_readdirargs *args = rqstp->rq_argp; - p = decode_fh(p, &args->fh); - if (!p) - return 0; - args->cookie = ntohl(*p++); - args->count = ntohl(*p++); - args->count = min_t(u32, args->count, PAGE_SIZE); - args->buffer = page_address(*(rqstp->rq_next_page++)); + if (!svcxdr_decode_fhandle(xdr, &args->fh)) + return false; + if (xdr_stream_decode_u32(xdr, &args->cookie) < 0) + return false; + if (xdr_stream_decode_u32(xdr, &args->count) < 0) + return false; - return xdr_argsize_check(rqstp, p); + return true; } /* * XDR encode functions */ -int -nfssvc_encode_void(struct svc_rqst *rqstp, __be32 *p) + +bool +nfssvc_encode_statres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { - return xdr_ressize_check(rqstp, p); + struct nfsd_stat *resp = rqstp->rq_resp; + + return svcxdr_encode_stat(xdr, resp->status); } -int -nfssvc_encode_attrstat(struct svc_rqst *rqstp, __be32 *p) +bool +nfssvc_encode_attrstatres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nfsd_attrstat *resp = rqstp->rq_resp; - p = encode_fattr(rqstp, p, &resp->fh, &resp->stat); - return xdr_ressize_check(rqstp, p); + if (!svcxdr_encode_stat(xdr, resp->status)) + return false; + switch (resp->status) { + case nfs_ok: + if (!svcxdr_encode_fattr(rqstp, xdr, &resp->fh, &resp->stat)) + return false; + break; + } + + return true; } -int -nfssvc_encode_diropres(struct svc_rqst *rqstp, __be32 *p) +bool +nfssvc_encode_diropres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nfsd_diropres *resp = rqstp->rq_resp; - p = encode_fh(p, &resp->fh); - p = encode_fattr(rqstp, p, &resp->fh, &resp->stat); - return xdr_ressize_check(rqstp, p); + if (!svcxdr_encode_stat(xdr, resp->status)) + return false; + switch (resp->status) { + case nfs_ok: + if (!svcxdr_encode_fhandle(xdr, &resp->fh)) + return false; + if (!svcxdr_encode_fattr(rqstp, xdr, &resp->fh, &resp->stat)) + return false; + break; + } + + return true; } -int -nfssvc_encode_readlinkres(struct svc_rqst *rqstp, __be32 *p) +bool +nfssvc_encode_readlinkres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nfsd_readlinkres *resp = rqstp->rq_resp; - - *p++ = htonl(resp->len); - xdr_ressize_check(rqstp, p); - rqstp->rq_res.page_len = resp->len; - if (resp->len & 3) { - /* need to pad the tail */ - rqstp->rq_res.tail[0].iov_base = p; - *p = 0; - rqstp->rq_res.tail[0].iov_len = 4 - (resp->len&3); + struct kvec *head = rqstp->rq_res.head; + + if (!svcxdr_encode_stat(xdr, resp->status)) + return false; + switch (resp->status) { + case nfs_ok: + if (xdr_stream_encode_u32(xdr, resp->len) < 0) + return false; + xdr_write_pages(xdr, &resp->page, 0, resp->len); + if (svc_encode_result_payload(rqstp, head->iov_len, resp->len) < 0) + return false; + break; } - return 1; + + return true; } -int -nfssvc_encode_readres(struct svc_rqst *rqstp, __be32 *p) +bool +nfssvc_encode_readres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nfsd_readres *resp = rqstp->rq_resp; - - p = encode_fattr(rqstp, p, &resp->fh, &resp->stat); - *p++ = htonl(resp->count); - xdr_ressize_check(rqstp, p); - - /* now update rqstp->rq_res to reflect data as well */ - rqstp->rq_res.page_len = resp->count; - if (resp->count & 3) { - /* need to pad the tail */ - rqstp->rq_res.tail[0].iov_base = p; - *p = 0; - rqstp->rq_res.tail[0].iov_len = 4 - (resp->count&3); + struct kvec *head = rqstp->rq_res.head; + + if (!svcxdr_encode_stat(xdr, resp->status)) + return false; + switch (resp->status) { + case nfs_ok: + if (!svcxdr_encode_fattr(rqstp, xdr, &resp->fh, &resp->stat)) + return false; + if (xdr_stream_encode_u32(xdr, resp->count) < 0) + return false; + xdr_write_pages(xdr, resp->pages, rqstp->rq_res.page_base, + resp->count); + if (svc_encode_result_payload(rqstp, head->iov_len, resp->count) < 0) + return false; + break; } - return 1; + + return true; } -int -nfssvc_encode_readdirres(struct svc_rqst *rqstp, __be32 *p) +bool +nfssvc_encode_readdirres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nfsd_readdirres *resp = rqstp->rq_resp; + struct xdr_buf *dirlist = &resp->dirlist; + + if (!svcxdr_encode_stat(xdr, resp->status)) + return false; + switch (resp->status) { + case nfs_ok: + xdr_write_pages(xdr, dirlist->pages, 0, dirlist->len); + /* no more entries */ + if (xdr_stream_encode_item_absent(xdr) < 0) + return false; + if (xdr_stream_encode_bool(xdr, resp->common.err == nfserr_eof) < 0) + return false; + break; + } - xdr_ressize_check(rqstp, p); - p = resp->buffer; - *p++ = 0; /* no more entries */ - *p++ = htonl((resp->common.err == nfserr_eof)); - rqstp->rq_res.page_len = (((unsigned long)p-1) & ~PAGE_MASK)+1; - - return 1; + return true; } -int -nfssvc_encode_statfsres(struct svc_rqst *rqstp, __be32 *p) +bool +nfssvc_encode_statfsres(struct svc_rqst *rqstp, struct xdr_stream *xdr) { struct nfsd_statfsres *resp = rqstp->rq_resp; struct kstatfs *stat = &resp->stats; + __be32 *p; + + if (!svcxdr_encode_stat(xdr, resp->status)) + return false; + switch (resp->status) { + case nfs_ok: + p = xdr_reserve_space(xdr, XDR_UNIT * 5); + if (!p) + return false; + *p++ = cpu_to_be32(NFSSVC_MAXBLKSIZE_V2); + *p++ = cpu_to_be32(stat->f_bsize); + *p++ = cpu_to_be32(stat->f_blocks); + *p++ = cpu_to_be32(stat->f_bfree); + *p = cpu_to_be32(stat->f_bavail); + break; + } - *p++ = htonl(NFSSVC_MAXBLKSIZE_V2); /* max transfer size */ - *p++ = htonl(stat->f_bsize); - *p++ = htonl(stat->f_blocks); - *p++ = htonl(stat->f_bfree); - *p++ = htonl(stat->f_bavail); - return xdr_ressize_check(rqstp, p); + return true; } -int -nfssvc_encode_entry(void *ccdv, const char *name, - int namlen, loff_t offset, u64 ino, unsigned int d_type) +/** + * nfssvc_encode_nfscookie - Encode a directory offset cookie + * @resp: readdir result context + * @offset: offset cookie to encode + * + * The buffer space for the offset cookie has already been reserved + * by svcxdr_encode_entry_common(). + */ +void nfssvc_encode_nfscookie(struct nfsd_readdirres *resp, u32 offset) { - struct readdir_cd *ccd = ccdv; - struct nfsd_readdirres *cd = container_of(ccd, struct nfsd_readdirres, common); - __be32 *p = cd->buffer; - int buflen, slen; + __be32 cookie = cpu_to_be32(offset); - /* - dprintk("nfsd: entry(%.*s off %ld ino %ld)\n", - namlen, name, offset, ino); - */ + if (!resp->cookie_offset) + return; - if (offset > ~((u32) 0)) { - cd->common.err = nfserr_fbig; - return -EINVAL; - } - if (cd->offset) - *cd->offset = htonl(offset); + write_bytes_to_xdr_buf(&resp->dirlist, resp->cookie_offset, &cookie, + sizeof(cookie)); + resp->cookie_offset = 0; +} - /* truncate filename */ - namlen = min(namlen, NFS2_MAXNAMLEN); - slen = XDR_QUADLEN(namlen); +static bool +svcxdr_encode_entry_common(struct nfsd_readdirres *resp, const char *name, + int namlen, loff_t offset, u64 ino) +{ + struct xdr_buf *dirlist = &resp->dirlist; + struct xdr_stream *xdr = &resp->xdr; + + if (xdr_stream_encode_item_present(xdr) < 0) + return false; + /* fileid */ + if (xdr_stream_encode_u32(xdr, (u32)ino) < 0) + return false; + /* name */ + if (xdr_stream_encode_opaque(xdr, name, min(namlen, NFS2_MAXNAMLEN)) < 0) + return false; + /* cookie */ + resp->cookie_offset = dirlist->len; + if (xdr_stream_encode_u32(xdr, ~0U) < 0) + return false; + + return true; +} - if ((buflen = cd->buflen - slen - 4) < 0) { - cd->common.err = nfserr_toosmall; - return -EINVAL; - } - if (ino > ~((u32) 0)) { - cd->common.err = nfserr_fbig; - return -EINVAL; - } - *p++ = xdr_one; /* mark entry present */ - *p++ = htonl((u32) ino); /* file id */ - p = xdr_encode_array(p, name, namlen);/* name length & name */ - cd->offset = p; /* remember pointer */ - *p++ = htonl(~0U); /* offset of next entry */ - - cd->buflen = buflen; - cd->buffer = p; - cd->common.err = nfs_ok; +/** + * nfssvc_encode_entry - encode one NFSv2 READDIR entry + * @data: directory context + * @name: name of the object to be encoded + * @namlen: length of that name, in bytes + * @offset: the offset of the previous entry + * @ino: the fileid of this entry + * @d_type: unused + * + * Return values: + * %0: Entry was successfully encoded. + * %-EINVAL: An encoding problem occured, secondary status code in resp->common.err + * + * On exit, the following fields are updated: + * - resp->xdr + * - resp->common.err + * - resp->cookie_offset + */ +int nfssvc_encode_entry(void *data, const char *name, int namlen, + loff_t offset, u64 ino, unsigned int d_type) +{ + struct readdir_cd *ccd = data; + struct nfsd_readdirres *resp = container_of(ccd, + struct nfsd_readdirres, + common); + unsigned int starting_length = resp->dirlist.len; + + /* The offset cookie for the previous entry */ + nfssvc_encode_nfscookie(resp, offset); + + if (!svcxdr_encode_entry_common(resp, name, namlen, offset, ino)) + goto out_toosmall; + + xdr_commit_encode(&resp->xdr); + resp->common.err = nfs_ok; return 0; + +out_toosmall: + resp->cookie_offset = 0; + resp->common.err = nfserr_toosmall; + resp->dirlist.len = starting_length; + return -EINVAL; } /* * XDR release functions */ -void -nfssvc_release_fhandle(struct svc_rqst *rqstp) +void nfssvc_release_attrstat(struct svc_rqst *rqstp) +{ + struct nfsd_attrstat *resp = rqstp->rq_resp; + + fh_put(&resp->fh); +} + +void nfssvc_release_diropres(struct svc_rqst *rqstp) { - struct nfsd_fhandle *resp = rqstp->rq_resp; + struct nfsd_diropres *resp = rqstp->rq_resp; + + fh_put(&resp->fh); +} + +void nfssvc_release_readres(struct svc_rqst *rqstp) +{ + struct nfsd_readres *resp = rqstp->rq_resp; fh_put(&resp->fh); } diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 68d3f30ee760..e2daef3cc003 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -57,20 +57,13 @@ typedef struct { } stateid_t; typedef struct { - stateid_t stid; + stateid_t cs_stid; #define NFS4_COPY_STID 1 #define NFS4_COPYNOTIFY_STID 2 - unsigned char sc_type; - refcount_t sc_count; + unsigned char cs_type; + refcount_t cs_count; } copy_stateid_t; -#define STATEID_FMT "(%08x/%08x/%08x/%08x)" -#define STATEID_VAL(s) \ - (s)->si_opaque.so_clid.cl_boot, \ - (s)->si_opaque.so_clid.cl_id, \ - (s)->si_opaque.so_id, \ - (s)->si_generation - struct nfsd4_callback { struct nfs4_client *cb_clp; struct rpc_message cb_msg; @@ -156,6 +149,7 @@ struct nfs4_delegation { /* For recall: */ int dl_retries; struct nfsd4_callback dl_recall; + bool dl_recalled; }; #define cb_to_delegation(cb) \ @@ -181,7 +175,7 @@ static inline struct nfs4_delegation *delegstateid(struct nfs4_stid *s) /* Maximum number of slots per session. 160 is useful for long haul TCP */ #define NFSD_MAX_SLOTS_PER_SESSION 160 /* Maximum number of operations per session compound */ -#define NFSD_MAX_OPS_PER_COMPOUND 16 +#define NFSD_MAX_OPS_PER_COMPOUND 50 /* Maximum session per slot cache size */ #define NFSD_SLOT_CACHE_SIZE 2048 /* Maximum number of NFSD_SLOT_CACHE_SIZE slots per session */ @@ -290,6 +284,28 @@ struct nfsd4_sessionid { #define HEXDIR_LEN 33 /* hex version of 16 byte md5 of cl_name plus '\0' */ /* + * State Meaning Where set + * -------------------------------------------------------------------------- + * | NFSD4_ACTIVE | Confirmed, active | Default | + * |------------------- ----------------------------------------------------| + * | NFSD4_COURTESY | Courtesy state. | nfs4_get_client_reaplist | + * | | Lease/lock/share | | + * | | reservation conflict | | + * | | can cause Courtesy | | + * | | client to be expired | | + * |------------------------------------------------------------------------| + * | NFSD4_EXPIRABLE | Courtesy client to be| nfs4_laundromat | + * | | expired by Laundromat| try_to_expire_client | + * | | due to conflict | | + * |------------------------------------------------------------------------| + */ +enum { + NFSD4_ACTIVE = 0, + NFSD4_COURTESY, + NFSD4_EXPIRABLE, +}; + +/* * struct nfs4_client - one per client. Clientids live here. * * The initial object created by an NFS client using SETCLIENTID (for NFSv4.0) @@ -378,6 +394,10 @@ struct nfs4_client { /* debugging info directory under nfsd/clients/ : */ struct dentry *cl_nfsd_dentry; + /* 'info' file within that directory. Ref is not counted, + * but will remain valid iff cl_nfsd_dentry != NULL + */ + struct dentry *cl_nfsd_info_dentry; /* for nfs41 callbacks */ /* We currently support a single back channel with a single slot */ @@ -388,6 +408,9 @@ struct nfs4_client { struct list_head async_copies; /* list of async copies */ spinlock_t async_lock; /* lock for async copies */ atomic_t cl_cb_inflight; /* Outstanding callbacks */ + + unsigned int cl_state; + atomic_t cl_delegs_in_recall; }; /* struct nfs4_client_reset @@ -519,6 +542,8 @@ struct nfs4_clnt_odstate { */ struct nfs4_file { refcount_t fi_ref; + struct inode * fi_inode; + bool fi_aliased; spinlock_t fi_lock; struct hlist_node fi_hash; /* hash on fi_fhandle */ struct list_head fi_stateids; @@ -569,6 +594,10 @@ struct nfs4_ol_stateid { struct list_head st_locks; struct nfs4_stateowner *st_stateowner; struct nfs4_clnt_odstate *st_clnt_odstate; +/* + * These bitmasks use 3 separate bits for READ, ALLOW, and BOTH; see the + * comment above bmap_to_share_mode() for explanation: + */ unsigned char st_access_bmap; unsigned char st_deny_bmap; struct nfs4_ol_stateid *st_openstp; @@ -630,6 +659,7 @@ struct nfsd4_blocked_lock { struct file_lock nbl_lock; struct knfsd_fh nbl_fh; struct nfsd4_callback nbl_cb; + struct kref nbl_kref; }; struct nfsd4_compound_state; @@ -656,26 +686,22 @@ void nfs4_remove_reclaim_record(struct nfs4_client_reclaim *, struct nfsd_net *) extern void nfs4_release_reclaim(struct nfsd_net *); extern struct nfs4_client_reclaim *nfsd4_find_reclaim_client(struct xdr_netobj name, struct nfsd_net *nn); -extern __be32 nfs4_check_open_reclaim(clientid_t *clid, - struct nfsd4_compound_state *cstate, struct nfsd_net *nn); +extern __be32 nfs4_check_open_reclaim(struct nfs4_client *); extern void nfsd4_probe_callback(struct nfs4_client *clp); extern void nfsd4_probe_callback_sync(struct nfs4_client *clp); extern void nfsd4_change_callback(struct nfs4_client *clp, struct nfs4_cb_conn *); extern void nfsd4_init_cb(struct nfsd4_callback *cb, struct nfs4_client *clp, const struct nfsd4_callback_ops *ops, enum nfsd4_cb_op op); -extern void nfsd4_run_cb(struct nfsd4_callback *cb); +extern bool nfsd4_run_cb(struct nfsd4_callback *cb); extern int nfsd4_create_callback_queue(void); extern void nfsd4_destroy_callback_queue(void); extern void nfsd4_shutdown_callback(struct nfs4_client *); extern void nfsd4_shutdown_copy(struct nfs4_client *clp); -extern void nfsd4_prepare_cb_recall(struct nfs4_delegation *dp); extern struct nfs4_client_reclaim *nfs4_client_to_reclaim(struct xdr_netobj name, struct xdr_netobj princhash, struct nfsd_net *nn); extern bool nfs4_has_reclaimed_state(struct xdr_netobj name, struct nfsd_net *nn); -struct nfs4_file *find_file(struct knfsd_fh *fh); void put_nfs4_file(struct nfs4_file *fi); -extern void nfs4_put_copy(struct nfsd4_copy *copy); extern struct nfsd4_copy * find_async_copy(struct nfs4_client *clp, stateid_t *staetid); extern void nfs4_put_cpntf_state(struct nfsd_net *nn, @@ -700,31 +726,9 @@ extern void nfsd4_client_record_remove(struct nfs4_client *clp); extern int nfsd4_client_record_check(struct nfs4_client *clp); extern void nfsd4_record_grace_done(struct nfsd_net *nn); -/* nfs fault injection functions */ -#ifdef CONFIG_NFSD_FAULT_INJECTION -void nfsd_fault_inject_init(void); -void nfsd_fault_inject_cleanup(void); - -u64 nfsd_inject_print_clients(void); -u64 nfsd_inject_forget_client(struct sockaddr_storage *, size_t); -u64 nfsd_inject_forget_clients(u64); - -u64 nfsd_inject_print_locks(void); -u64 nfsd_inject_forget_client_locks(struct sockaddr_storage *, size_t); -u64 nfsd_inject_forget_locks(u64); - -u64 nfsd_inject_print_openowners(void); -u64 nfsd_inject_forget_client_openowners(struct sockaddr_storage *, size_t); -u64 nfsd_inject_forget_openowners(u64); - -u64 nfsd_inject_print_delegations(void); -u64 nfsd_inject_forget_client_delegations(struct sockaddr_storage *, size_t); -u64 nfsd_inject_forget_delegations(u64); -u64 nfsd_inject_recall_client_delegations(struct sockaddr_storage *, size_t); -u64 nfsd_inject_recall_delegations(u64); -#else /* CONFIG_NFSD_FAULT_INJECTION */ -static inline void nfsd_fault_inject_init(void) {} -static inline void nfsd_fault_inject_cleanup(void) {} -#endif /* CONFIG_NFSD_FAULT_INJECTION */ - +static inline bool try_to_expire_client(struct nfs4_client *clp) +{ + cmpxchg(&clp->cl_state, NFSD4_COURTESY, NFSD4_EXPIRABLE); + return clp->cl_state == NFSD4_EXPIRABLE; +} #endif /* NFSD4_STATE_H */ diff --git a/fs/nfsd/stats.c b/fs/nfsd/stats.c index b1bc582b0493..777e24e5da33 100644 --- a/fs/nfsd/stats.c +++ b/fs/nfsd/stats.c @@ -7,16 +7,14 @@ * Format: * rc <hits> <misses> <nocache> * Statistsics for the reply cache - * fh <stale> <total-lookups> <anonlookups> <dir-not-in-dcache> <nondir-not-in-dcache> + * fh <stale> <deprecated filehandle cache stats> * statistics for filehandle lookup * io <bytes-read> <bytes-written> * statistics for IO throughput - * th <threads> <fullcnt> <10%-20%> <20%-30%> ... <90%-100%> <100%> - * time (seconds) when nfsd thread usage above thresholds - * and number of times that all threads were in use - * ra cache-size <10% <20% <30% ... <100% not-found - * number of times that read-ahead entry was found that deep in - * the cache. + * th <threads> <deprecated thread usage histogram stats> + * number of threads + * ra <deprecated ra-cache stats> + * * plus generic RPC stats (see net/sunrpc/stats.c) * * Copyright (C) 1995, 1996, 1997 Olaf Kirch <okir@monad.swb.de> @@ -34,35 +32,28 @@ struct svc_stat nfsd_svcstats = { .program = &nfsd_program, }; -static int nfsd_proc_show(struct seq_file *seq, void *v) +static int nfsd_show(struct seq_file *seq, void *v) { int i; - seq_printf(seq, "rc %u %u %u\nfh %u %u %u %u %u\nio %u %u\n", - nfsdstats.rchits, - nfsdstats.rcmisses, - nfsdstats.rcnocache, - nfsdstats.fh_stale, - nfsdstats.fh_lookup, - nfsdstats.fh_anon, - nfsdstats.fh_nocache_dir, - nfsdstats.fh_nocache_nondir, - nfsdstats.io_read, - nfsdstats.io_write); + seq_printf(seq, "rc %lld %lld %lld\nfh %lld 0 0 0 0\nio %lld %lld\n", + percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_RC_HITS]), + percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_RC_MISSES]), + percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_RC_NOCACHE]), + percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_FH_STALE]), + percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_IO_READ]), + percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_IO_WRITE])); + /* thread usage: */ - seq_printf(seq, "th %u %u", nfsdstats.th_cnt, nfsdstats.th_fullcnt); - for (i=0; i<10; i++) { - unsigned int jifs = nfsdstats.th_usage[i]; - unsigned int sec = jifs / HZ, msec = (jifs % HZ)*1000/HZ; - seq_printf(seq, " %u.%03u", sec, msec); - } + seq_printf(seq, "th %u 0", atomic_read(&nfsdstats.th_cnt)); + + /* deprecated thread usage histogram stats */ + for (i = 0; i < 10; i++) + seq_puts(seq, " 0.000"); + + /* deprecated ra-cache stats */ + seq_puts(seq, "\nra 0 0 0 0 0 0 0 0 0 0 0 0\n"); - /* newline and ra-cache */ - seq_printf(seq, "\nra %u", nfsdstats.ra_size); - for (i=0; i<11; i++) - seq_printf(seq, " %u", nfsdstats.ra_depth[i]); - seq_putc(seq, '\n'); - /* show my rpc info */ svc_seq_show(seq, &nfsd_svcstats); @@ -70,8 +61,10 @@ static int nfsd_proc_show(struct seq_file *seq, void *v) /* Show count for individual nfsv4 operations */ /* Writing operation numbers 0 1 2 also for maintaining uniformity */ seq_printf(seq,"proc4ops %u", LAST_NFS4_OP + 1); - for (i = 0; i <= LAST_NFS4_OP; i++) - seq_printf(seq, " %u", nfsdstats.nfs4_opcount[i]); + for (i = 0; i <= LAST_NFS4_OP; i++) { + seq_printf(seq, " %lld", + percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_NFS4_OP(i)])); + } seq_putc(seq, '\n'); #endif @@ -79,26 +72,65 @@ static int nfsd_proc_show(struct seq_file *seq, void *v) return 0; } -static int nfsd_proc_open(struct inode *inode, struct file *file) +DEFINE_PROC_SHOW_ATTRIBUTE(nfsd); + +int nfsd_percpu_counters_init(struct percpu_counter counters[], int num) { - return single_open(file, nfsd_proc_show, NULL); + int i, err = 0; + + for (i = 0; !err && i < num; i++) + err = percpu_counter_init(&counters[i], 0, GFP_KERNEL); + + if (!err) + return 0; + + for (; i > 0; i--) + percpu_counter_destroy(&counters[i-1]); + + return err; } -static const struct proc_ops nfsd_proc_ops = { - .proc_open = nfsd_proc_open, - .proc_read = seq_read, - .proc_lseek = seq_lseek, - .proc_release = single_release, -}; +void nfsd_percpu_counters_reset(struct percpu_counter counters[], int num) +{ + int i; + + for (i = 0; i < num; i++) + percpu_counter_set(&counters[i], 0); +} + +void nfsd_percpu_counters_destroy(struct percpu_counter counters[], int num) +{ + int i; + + for (i = 0; i < num; i++) + percpu_counter_destroy(&counters[i]); +} + +static int nfsd_stat_counters_init(void) +{ + return nfsd_percpu_counters_init(nfsdstats.counter, NFSD_STATS_COUNTERS_NUM); +} + +static void nfsd_stat_counters_destroy(void) +{ + nfsd_percpu_counters_destroy(nfsdstats.counter, NFSD_STATS_COUNTERS_NUM); +} -void -nfsd_stat_init(void) +int nfsd_stat_init(void) { + int err; + + err = nfsd_stat_counters_init(); + if (err) + return err; + svc_proc_register(&init_net, &nfsd_svcstats, &nfsd_proc_ops); + + return 0; } -void -nfsd_stat_shutdown(void) +void nfsd_stat_shutdown(void) { + nfsd_stat_counters_destroy(); svc_proc_unregister(&init_net, "nfsd"); } diff --git a/fs/nfsd/stats.h b/fs/nfsd/stats.h index b23fdac69820..9b43dc3d9991 100644 --- a/fs/nfsd/stats.h +++ b/fs/nfsd/stats.h @@ -8,37 +8,89 @@ #define _NFSD_STATS_H #include <uapi/linux/nfsd/stats.h> +#include <linux/percpu_counter.h> -struct nfsd_stats { - unsigned int rchits; /* repcache hits */ - unsigned int rcmisses; /* repcache hits */ - unsigned int rcnocache; /* uncached reqs */ - unsigned int fh_stale; /* FH stale error */ - unsigned int fh_lookup; /* dentry cached */ - unsigned int fh_anon; /* anon file dentry returned */ - unsigned int fh_nocache_dir; /* filehandle not found in dcache */ - unsigned int fh_nocache_nondir; /* filehandle not found in dcache */ - unsigned int io_read; /* bytes returned to read requests */ - unsigned int io_write; /* bytes passed in write requests */ - unsigned int th_cnt; /* number of available threads */ - unsigned int th_usage[10]; /* number of ticks during which n perdeciles - * of available threads were in use */ - unsigned int th_fullcnt; /* number of times last free thread was used */ - unsigned int ra_size; /* size of ra cache */ - unsigned int ra_depth[11]; /* number of times ra entry was found that deep - * in the cache (10percentiles). [10] = not found */ +enum { + NFSD_STATS_RC_HITS, /* repcache hits */ + NFSD_STATS_RC_MISSES, /* repcache misses */ + NFSD_STATS_RC_NOCACHE, /* uncached reqs */ + NFSD_STATS_FH_STALE, /* FH stale error */ + NFSD_STATS_IO_READ, /* bytes returned to read requests */ + NFSD_STATS_IO_WRITE, /* bytes passed in write requests */ #ifdef CONFIG_NFSD_V4 - unsigned int nfs4_opcount[LAST_NFS4_OP + 1]; /* count of individual nfsv4 operations */ + NFSD_STATS_FIRST_NFS4_OP, /* count of individual nfsv4 operations */ + NFSD_STATS_LAST_NFS4_OP = NFSD_STATS_FIRST_NFS4_OP + LAST_NFS4_OP, +#define NFSD_STATS_NFS4_OP(op) (NFSD_STATS_FIRST_NFS4_OP + (op)) #endif - + NFSD_STATS_COUNTERS_NUM }; +struct nfsd_stats { + struct percpu_counter counter[NFSD_STATS_COUNTERS_NUM]; + + atomic_t th_cnt; /* number of available threads */ +}; extern struct nfsd_stats nfsdstats; + extern struct svc_stat nfsd_svcstats; -void nfsd_stat_init(void); -void nfsd_stat_shutdown(void); +int nfsd_percpu_counters_init(struct percpu_counter counters[], int num); +void nfsd_percpu_counters_reset(struct percpu_counter counters[], int num); +void nfsd_percpu_counters_destroy(struct percpu_counter counters[], int num); +int nfsd_stat_init(void); +void nfsd_stat_shutdown(void); + +static inline void nfsd_stats_rc_hits_inc(void) +{ + percpu_counter_inc(&nfsdstats.counter[NFSD_STATS_RC_HITS]); +} + +static inline void nfsd_stats_rc_misses_inc(void) +{ + percpu_counter_inc(&nfsdstats.counter[NFSD_STATS_RC_MISSES]); +} + +static inline void nfsd_stats_rc_nocache_inc(void) +{ + percpu_counter_inc(&nfsdstats.counter[NFSD_STATS_RC_NOCACHE]); +} + +static inline void nfsd_stats_fh_stale_inc(struct svc_export *exp) +{ + percpu_counter_inc(&nfsdstats.counter[NFSD_STATS_FH_STALE]); + if (exp) + percpu_counter_inc(&exp->ex_stats.counter[EXP_STATS_FH_STALE]); +} + +static inline void nfsd_stats_io_read_add(struct svc_export *exp, s64 amount) +{ + percpu_counter_add(&nfsdstats.counter[NFSD_STATS_IO_READ], amount); + if (exp) + percpu_counter_add(&exp->ex_stats.counter[EXP_STATS_IO_READ], amount); +} + +static inline void nfsd_stats_io_write_add(struct svc_export *exp, s64 amount) +{ + percpu_counter_add(&nfsdstats.counter[NFSD_STATS_IO_WRITE], amount); + if (exp) + percpu_counter_add(&exp->ex_stats.counter[EXP_STATS_IO_WRITE], amount); +} + +static inline void nfsd_stats_payload_misses_inc(struct nfsd_net *nn) +{ + percpu_counter_inc(&nn->counter[NFSD_NET_PAYLOAD_MISSES]); +} + +static inline void nfsd_stats_drc_mem_usage_add(struct nfsd_net *nn, s64 amount) +{ + percpu_counter_add(&nn->counter[NFSD_NET_DRC_MEM_USAGE], amount); +} + +static inline void nfsd_stats_drc_mem_usage_sub(struct nfsd_net *nn, s64 amount) +{ + percpu_counter_sub(&nn->counter[NFSD_NET_DRC_MEM_USAGE], amount); +} #endif /* _NFSD_STATS_H */ diff --git a/fs/nfsd/trace.c b/fs/nfsd/trace.c index 90967466a1e5..f008b95ceec2 100644 --- a/fs/nfsd/trace.c +++ b/fs/nfsd/trace.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #define CREATE_TRACE_POINTS #include "trace.h" diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h index 06dd0d337049..06a96e955bd0 100644 --- a/fs/nfsd/trace.h +++ b/fs/nfsd/trace.h @@ -9,22 +9,101 @@ #define _NFSD_TRACE_H #include <linux/tracepoint.h> + +#include "export.h" #include "nfsfh.h" +#define NFSD_TRACE_PROC_RES_FIELDS \ + __field(unsigned int, netns_ino) \ + __field(u32, xid) \ + __field(unsigned long, status) \ + __array(unsigned char, server, sizeof(struct sockaddr_in6)) \ + __array(unsigned char, client, sizeof(struct sockaddr_in6)) + +#define NFSD_TRACE_PROC_RES_ASSIGNMENTS(error) \ + do { \ + __entry->netns_ino = SVC_NET(rqstp)->ns.inum; \ + __entry->xid = be32_to_cpu(rqstp->rq_xid); \ + __entry->status = be32_to_cpu(error); \ + memcpy(__entry->server, &rqstp->rq_xprt->xpt_local, \ + rqstp->rq_xprt->xpt_locallen); \ + memcpy(__entry->client, &rqstp->rq_xprt->xpt_remote, \ + rqstp->rq_xprt->xpt_remotelen); \ + } while (0); + +DECLARE_EVENT_CLASS(nfsd_xdr_err_class, + TP_PROTO( + const struct svc_rqst *rqstp + ), + TP_ARGS(rqstp), + TP_STRUCT__entry( + __field(unsigned int, netns_ino) + __field(u32, xid) + __field(u32, vers) + __field(u32, proc) + __sockaddr(server, rqstp->rq_xprt->xpt_locallen) + __sockaddr(client, rqstp->rq_xprt->xpt_remotelen) + ), + TP_fast_assign( + const struct svc_xprt *xprt = rqstp->rq_xprt; + + __entry->netns_ino = xprt->xpt_net->ns.inum; + __entry->xid = be32_to_cpu(rqstp->rq_xid); + __entry->vers = rqstp->rq_vers; + __entry->proc = rqstp->rq_proc; + __assign_sockaddr(server, &xprt->xpt_local, xprt->xpt_locallen); + __assign_sockaddr(client, &xprt->xpt_remote, xprt->xpt_remotelen); + ), + TP_printk("xid=0x%08x vers=%u proc=%u", + __entry->xid, __entry->vers, __entry->proc + ) +); + +#define DEFINE_NFSD_XDR_ERR_EVENT(name) \ +DEFINE_EVENT(nfsd_xdr_err_class, nfsd_##name##_err, \ + TP_PROTO(const struct svc_rqst *rqstp), \ + TP_ARGS(rqstp)) + +DEFINE_NFSD_XDR_ERR_EVENT(garbage_args); +DEFINE_NFSD_XDR_ERR_EVENT(cant_encode); + +#define show_nfsd_may_flags(x) \ + __print_flags(x, "|", \ + { NFSD_MAY_EXEC, "EXEC" }, \ + { NFSD_MAY_WRITE, "WRITE" }, \ + { NFSD_MAY_READ, "READ" }, \ + { NFSD_MAY_SATTR, "SATTR" }, \ + { NFSD_MAY_TRUNC, "TRUNC" }, \ + { NFSD_MAY_LOCK, "LOCK" }, \ + { NFSD_MAY_OWNER_OVERRIDE, "OWNER_OVERRIDE" }, \ + { NFSD_MAY_LOCAL_ACCESS, "LOCAL_ACCESS" }, \ + { NFSD_MAY_BYPASS_GSS_ON_ROOT, "BYPASS_GSS_ON_ROOT" }, \ + { NFSD_MAY_NOT_BREAK_LEASE, "NOT_BREAK_LEASE" }, \ + { NFSD_MAY_BYPASS_GSS, "BYPASS_GSS" }, \ + { NFSD_MAY_READ_IF_EXEC, "READ_IF_EXEC" }, \ + { NFSD_MAY_64BIT_COOKIE, "64BIT_COOKIE" }) + TRACE_EVENT(nfsd_compound, - TP_PROTO(const struct svc_rqst *rqst, - u32 args_opcnt), - TP_ARGS(rqst, args_opcnt), + TP_PROTO( + const struct svc_rqst *rqst, + const char *tag, + u32 taglen, + u32 opcnt + ), + TP_ARGS(rqst, tag, taglen, opcnt), TP_STRUCT__entry( __field(u32, xid) - __field(u32, args_opcnt) + __field(u32, opcnt) + __string_len(tag, tag, taglen) ), TP_fast_assign( __entry->xid = be32_to_cpu(rqst->rq_xid); - __entry->args_opcnt = args_opcnt; + __entry->opcnt = opcnt; + __assign_str_len(tag, tag, taglen); ), - TP_printk("xid=0x%08x opcnt=%u", - __entry->xid, __entry->args_opcnt) + TP_printk("xid=0x%08x opcnt=%u tag=%s", + __entry->xid, __entry->opcnt, __get_str(tag) + ) ) TRACE_EVENT(nfsd_compound_status, @@ -50,17 +129,276 @@ TRACE_EVENT(nfsd_compound_status, __get_str(name), __entry->status) ) +TRACE_EVENT(nfsd_compound_decode_err, + TP_PROTO( + const struct svc_rqst *rqstp, + u32 args_opcnt, + u32 resp_opcnt, + u32 opnum, + __be32 status + ), + TP_ARGS(rqstp, args_opcnt, resp_opcnt, opnum, status), + TP_STRUCT__entry( + NFSD_TRACE_PROC_RES_FIELDS + + __field(u32, args_opcnt) + __field(u32, resp_opcnt) + __field(u32, opnum) + ), + TP_fast_assign( + NFSD_TRACE_PROC_RES_ASSIGNMENTS(status) + + __entry->args_opcnt = args_opcnt; + __entry->resp_opcnt = resp_opcnt; + __entry->opnum = opnum; + ), + TP_printk("op=%u/%u opnum=%u status=%lu", + __entry->resp_opcnt, __entry->args_opcnt, + __entry->opnum, __entry->status) +); + +TRACE_EVENT(nfsd_compound_encode_err, + TP_PROTO( + const struct svc_rqst *rqstp, + u32 opnum, + __be32 status + ), + TP_ARGS(rqstp, opnum, status), + TP_STRUCT__entry( + NFSD_TRACE_PROC_RES_FIELDS + + __field(u32, opnum) + ), + TP_fast_assign( + NFSD_TRACE_PROC_RES_ASSIGNMENTS(status) + + __entry->opnum = opnum; + ), + TP_printk("opnum=%u status=%lu", + __entry->opnum, __entry->status) +); + +#define show_fs_file_type(x) \ + __print_symbolic(x, \ + { S_IFLNK, "LNK" }, \ + { S_IFREG, "REG" }, \ + { S_IFDIR, "DIR" }, \ + { S_IFCHR, "CHR" }, \ + { S_IFBLK, "BLK" }, \ + { S_IFIFO, "FIFO" }, \ + { S_IFSOCK, "SOCK" }) + +TRACE_EVENT(nfsd_fh_verify, + TP_PROTO( + const struct svc_rqst *rqstp, + const struct svc_fh *fhp, + umode_t type, + int access + ), + TP_ARGS(rqstp, fhp, type, access), + TP_STRUCT__entry( + __field(unsigned int, netns_ino) + __sockaddr(server, rqstp->rq_xprt->xpt_remotelen) + __sockaddr(client, rqstp->rq_xprt->xpt_remotelen) + __field(u32, xid) + __field(u32, fh_hash) + __field(const void *, inode) + __field(unsigned long, type) + __field(unsigned long, access) + ), + TP_fast_assign( + __entry->netns_ino = SVC_NET(rqstp)->ns.inum; + __assign_sockaddr(server, &rqstp->rq_xprt->xpt_local, + rqstp->rq_xprt->xpt_locallen); + __assign_sockaddr(client, &rqstp->rq_xprt->xpt_remote, + rqstp->rq_xprt->xpt_remotelen); + __entry->xid = be32_to_cpu(rqstp->rq_xid); + __entry->fh_hash = knfsd_fh_hash(&fhp->fh_handle); + __entry->inode = d_inode(fhp->fh_dentry); + __entry->type = type; + __entry->access = access; + ), + TP_printk("xid=0x%08x fh_hash=0x%08x type=%s access=%s", + __entry->xid, __entry->fh_hash, + show_fs_file_type(__entry->type), + show_nfsd_may_flags(__entry->access) + ) +); + +TRACE_EVENT_CONDITION(nfsd_fh_verify_err, + TP_PROTO( + const struct svc_rqst *rqstp, + const struct svc_fh *fhp, + umode_t type, + int access, + __be32 error + ), + TP_ARGS(rqstp, fhp, type, access, error), + TP_CONDITION(error), + TP_STRUCT__entry( + __field(unsigned int, netns_ino) + __sockaddr(server, rqstp->rq_xprt->xpt_remotelen) + __sockaddr(client, rqstp->rq_xprt->xpt_remotelen) + __field(u32, xid) + __field(u32, fh_hash) + __field(const void *, inode) + __field(unsigned long, type) + __field(unsigned long, access) + __field(int, error) + ), + TP_fast_assign( + __entry->netns_ino = SVC_NET(rqstp)->ns.inum; + __assign_sockaddr(server, &rqstp->rq_xprt->xpt_local, + rqstp->rq_xprt->xpt_locallen); + __assign_sockaddr(client, &rqstp->rq_xprt->xpt_remote, + rqstp->rq_xprt->xpt_remotelen); + __entry->xid = be32_to_cpu(rqstp->rq_xid); + __entry->fh_hash = knfsd_fh_hash(&fhp->fh_handle); + __entry->inode = d_inode(fhp->fh_dentry); + __entry->type = type; + __entry->access = access; + __entry->error = be32_to_cpu(error); + ), + TP_printk("xid=0x%08x fh_hash=0x%08x type=%s access=%s error=%d", + __entry->xid, __entry->fh_hash, + show_fs_file_type(__entry->type), + show_nfsd_may_flags(__entry->access), + __entry->error + ) +); + +DECLARE_EVENT_CLASS(nfsd_fh_err_class, + TP_PROTO(struct svc_rqst *rqstp, + struct svc_fh *fhp, + int status), + TP_ARGS(rqstp, fhp, status), + TP_STRUCT__entry( + __field(u32, xid) + __field(u32, fh_hash) + __field(int, status) + ), + TP_fast_assign( + __entry->xid = be32_to_cpu(rqstp->rq_xid); + __entry->fh_hash = knfsd_fh_hash(&fhp->fh_handle); + __entry->status = status; + ), + TP_printk("xid=0x%08x fh_hash=0x%08x status=%d", + __entry->xid, __entry->fh_hash, + __entry->status) +) + +#define DEFINE_NFSD_FH_ERR_EVENT(name) \ +DEFINE_EVENT(nfsd_fh_err_class, nfsd_##name, \ + TP_PROTO(struct svc_rqst *rqstp, \ + struct svc_fh *fhp, \ + int status), \ + TP_ARGS(rqstp, fhp, status)) + +DEFINE_NFSD_FH_ERR_EVENT(set_fh_dentry_badexport); +DEFINE_NFSD_FH_ERR_EVENT(set_fh_dentry_badhandle); + +TRACE_EVENT(nfsd_exp_find_key, + TP_PROTO(const struct svc_expkey *key, + int status), + TP_ARGS(key, status), + TP_STRUCT__entry( + __field(int, fsidtype) + __array(u32, fsid, 6) + __string(auth_domain, key->ek_client->name) + __field(int, status) + ), + TP_fast_assign( + __entry->fsidtype = key->ek_fsidtype; + memcpy(__entry->fsid, key->ek_fsid, 4*6); + __assign_str(auth_domain, key->ek_client->name); + __entry->status = status; + ), + TP_printk("fsid=%x::%s domain=%s status=%d", + __entry->fsidtype, + __print_array(__entry->fsid, 6, 4), + __get_str(auth_domain), + __entry->status + ) +); + +TRACE_EVENT(nfsd_expkey_update, + TP_PROTO(const struct svc_expkey *key, const char *exp_path), + TP_ARGS(key, exp_path), + TP_STRUCT__entry( + __field(int, fsidtype) + __array(u32, fsid, 6) + __string(auth_domain, key->ek_client->name) + __string(path, exp_path) + __field(bool, cache) + ), + TP_fast_assign( + __entry->fsidtype = key->ek_fsidtype; + memcpy(__entry->fsid, key->ek_fsid, 4*6); + __assign_str(auth_domain, key->ek_client->name); + __assign_str(path, exp_path); + __entry->cache = !test_bit(CACHE_NEGATIVE, &key->h.flags); + ), + TP_printk("fsid=%x::%s domain=%s path=%s cache=%s", + __entry->fsidtype, + __print_array(__entry->fsid, 6, 4), + __get_str(auth_domain), + __get_str(path), + __entry->cache ? "pos" : "neg" + ) +); + +TRACE_EVENT(nfsd_exp_get_by_name, + TP_PROTO(const struct svc_export *key, + int status), + TP_ARGS(key, status), + TP_STRUCT__entry( + __string(path, key->ex_path.dentry->d_name.name) + __string(auth_domain, key->ex_client->name) + __field(int, status) + ), + TP_fast_assign( + __assign_str(path, key->ex_path.dentry->d_name.name); + __assign_str(auth_domain, key->ex_client->name); + __entry->status = status; + ), + TP_printk("path=%s domain=%s status=%d", + __get_str(path), + __get_str(auth_domain), + __entry->status + ) +); + +TRACE_EVENT(nfsd_export_update, + TP_PROTO(const struct svc_export *key), + TP_ARGS(key), + TP_STRUCT__entry( + __string(path, key->ex_path.dentry->d_name.name) + __string(auth_domain, key->ex_client->name) + __field(bool, cache) + ), + TP_fast_assign( + __assign_str(path, key->ex_path.dentry->d_name.name); + __assign_str(auth_domain, key->ex_client->name); + __entry->cache = !test_bit(CACHE_NEGATIVE, &key->h.flags); + ), + TP_printk("path=%s domain=%s cache=%s", + __get_str(path), + __get_str(auth_domain), + __entry->cache ? "pos" : "neg" + ) +); + DECLARE_EVENT_CLASS(nfsd_io_class, TP_PROTO(struct svc_rqst *rqstp, struct svc_fh *fhp, - loff_t offset, - unsigned long len), + u64 offset, + u32 len), TP_ARGS(rqstp, fhp, offset, len), TP_STRUCT__entry( __field(u32, xid) __field(u32, fh_hash) - __field(loff_t, offset) - __field(unsigned long, len) + __field(u64, offset) + __field(u32, len) ), TP_fast_assign( __entry->xid = be32_to_cpu(rqstp->rq_xid); @@ -68,7 +406,7 @@ DECLARE_EVENT_CLASS(nfsd_io_class, __entry->offset = offset; __entry->len = len; ), - TP_printk("xid=0x%08x fh_hash=0x%08x offset=%lld len=%lu", + TP_printk("xid=0x%08x fh_hash=0x%08x offset=%llu len=%u", __entry->xid, __entry->fh_hash, __entry->offset, __entry->len) ) @@ -77,8 +415,8 @@ DECLARE_EVENT_CLASS(nfsd_io_class, DEFINE_EVENT(nfsd_io_class, nfsd_##name, \ TP_PROTO(struct svc_rqst *rqstp, \ struct svc_fh *fhp, \ - loff_t offset, \ - unsigned long len), \ + u64 offset, \ + u32 len), \ TP_ARGS(rqstp, fhp, offset, len)) DEFINE_NFSD_IO_EVENT(read_start); @@ -125,10 +463,104 @@ DEFINE_EVENT(nfsd_err_class, nfsd_##name, \ DEFINE_NFSD_ERR_EVENT(read_err); DEFINE_NFSD_ERR_EVENT(write_err); +TRACE_EVENT(nfsd_dirent, + TP_PROTO(struct svc_fh *fhp, + u64 ino, + const char *name, + int namlen), + TP_ARGS(fhp, ino, name, namlen), + TP_STRUCT__entry( + __field(u32, fh_hash) + __field(u64, ino) + __string_len(name, name, namlen) + ), + TP_fast_assign( + __entry->fh_hash = fhp ? knfsd_fh_hash(&fhp->fh_handle) : 0; + __entry->ino = ino; + __assign_str_len(name, name, namlen) + ), + TP_printk("fh_hash=0x%08x ino=%llu name=%s", + __entry->fh_hash, __entry->ino, __get_str(name) + ) +) + +DECLARE_EVENT_CLASS(nfsd_copy_err_class, + TP_PROTO(struct svc_rqst *rqstp, + struct svc_fh *src_fhp, + loff_t src_offset, + struct svc_fh *dst_fhp, + loff_t dst_offset, + u64 count, + int status), + TP_ARGS(rqstp, src_fhp, src_offset, dst_fhp, dst_offset, count, status), + TP_STRUCT__entry( + __field(u32, xid) + __field(u32, src_fh_hash) + __field(loff_t, src_offset) + __field(u32, dst_fh_hash) + __field(loff_t, dst_offset) + __field(u64, count) + __field(int, status) + ), + TP_fast_assign( + __entry->xid = be32_to_cpu(rqstp->rq_xid); + __entry->src_fh_hash = knfsd_fh_hash(&src_fhp->fh_handle); + __entry->src_offset = src_offset; + __entry->dst_fh_hash = knfsd_fh_hash(&dst_fhp->fh_handle); + __entry->dst_offset = dst_offset; + __entry->count = count; + __entry->status = status; + ), + TP_printk("xid=0x%08x src_fh_hash=0x%08x src_offset=%lld " + "dst_fh_hash=0x%08x dst_offset=%lld " + "count=%llu status=%d", + __entry->xid, __entry->src_fh_hash, __entry->src_offset, + __entry->dst_fh_hash, __entry->dst_offset, + (unsigned long long)__entry->count, + __entry->status) +) + +#define DEFINE_NFSD_COPY_ERR_EVENT(name) \ +DEFINE_EVENT(nfsd_copy_err_class, nfsd_##name, \ + TP_PROTO(struct svc_rqst *rqstp, \ + struct svc_fh *src_fhp, \ + loff_t src_offset, \ + struct svc_fh *dst_fhp, \ + loff_t dst_offset, \ + u64 count, \ + int status), \ + TP_ARGS(rqstp, src_fhp, src_offset, dst_fhp, dst_offset, \ + count, status)) + +DEFINE_NFSD_COPY_ERR_EVENT(clone_file_range_err); + #include "state.h" #include "filecache.h" #include "vfs.h" +TRACE_EVENT(nfsd_delegret_wakeup, + TP_PROTO( + const struct svc_rqst *rqstp, + const struct inode *inode, + long timeo + ), + TP_ARGS(rqstp, inode, timeo), + TP_STRUCT__entry( + __field(u32, xid) + __field(const void *, inode) + __field(long, timeo) + ), + TP_fast_assign( + __entry->xid = be32_to_cpu(rqstp->rq_xid); + __entry->inode = inode; + __entry->timeo = timeo; + ), + TP_printk("xid=0x%08x inode=%p%s", + __entry->xid, __entry->inode, + __entry->timeo == 0 ? " (timed out)" : "" + ) +); + DECLARE_EVENT_CLASS(nfsd_stateid_class, TP_PROTO(stateid_t *stp), TP_ARGS(stp), @@ -155,6 +587,7 @@ DECLARE_EVENT_CLASS(nfsd_stateid_class, DEFINE_EVENT(nfsd_stateid_class, nfsd_##name, \ TP_PROTO(stateid_t *stp), \ TP_ARGS(stp)) + DEFINE_STATEID_EVENT(layoutstate_alloc); DEFINE_STATEID_EVENT(layoutstate_unhash); DEFINE_STATEID_EVENT(layoutstate_free); @@ -166,32 +599,227 @@ DEFINE_STATEID_EVENT(layout_recall_done); DEFINE_STATEID_EVENT(layout_recall_fail); DEFINE_STATEID_EVENT(layout_recall_release); -TRACE_DEFINE_ENUM(NFSD_FILE_HASHED); -TRACE_DEFINE_ENUM(NFSD_FILE_PENDING); -TRACE_DEFINE_ENUM(NFSD_FILE_BREAK_READ); -TRACE_DEFINE_ENUM(NFSD_FILE_BREAK_WRITE); -TRACE_DEFINE_ENUM(NFSD_FILE_REFERENCED); +DEFINE_STATEID_EVENT(open); +DEFINE_STATEID_EVENT(deleg_read); +DEFINE_STATEID_EVENT(deleg_recall); + +DECLARE_EVENT_CLASS(nfsd_stateseqid_class, + TP_PROTO(u32 seqid, const stateid_t *stp), + TP_ARGS(seqid, stp), + TP_STRUCT__entry( + __field(u32, seqid) + __field(u32, cl_boot) + __field(u32, cl_id) + __field(u32, si_id) + __field(u32, si_generation) + ), + TP_fast_assign( + __entry->seqid = seqid; + __entry->cl_boot = stp->si_opaque.so_clid.cl_boot; + __entry->cl_id = stp->si_opaque.so_clid.cl_id; + __entry->si_id = stp->si_opaque.so_id; + __entry->si_generation = stp->si_generation; + ), + TP_printk("seqid=%u client %08x:%08x stateid %08x:%08x", + __entry->seqid, __entry->cl_boot, __entry->cl_id, + __entry->si_id, __entry->si_generation) +) + +#define DEFINE_STATESEQID_EVENT(name) \ +DEFINE_EVENT(nfsd_stateseqid_class, nfsd_##name, \ + TP_PROTO(u32 seqid, const stateid_t *stp), \ + TP_ARGS(seqid, stp)) + +DEFINE_STATESEQID_EVENT(preprocess); +DEFINE_STATESEQID_EVENT(open_confirm); + +DECLARE_EVENT_CLASS(nfsd_clientid_class, + TP_PROTO(const clientid_t *clid), + TP_ARGS(clid), + TP_STRUCT__entry( + __field(u32, cl_boot) + __field(u32, cl_id) + ), + TP_fast_assign( + __entry->cl_boot = clid->cl_boot; + __entry->cl_id = clid->cl_id; + ), + TP_printk("client %08x:%08x", __entry->cl_boot, __entry->cl_id) +) + +#define DEFINE_CLIENTID_EVENT(name) \ +DEFINE_EVENT(nfsd_clientid_class, nfsd_clid_##name, \ + TP_PROTO(const clientid_t *clid), \ + TP_ARGS(clid)) + +DEFINE_CLIENTID_EVENT(expire_unconf); +DEFINE_CLIENTID_EVENT(reclaim_complete); +DEFINE_CLIENTID_EVENT(confirmed); +DEFINE_CLIENTID_EVENT(destroyed); +DEFINE_CLIENTID_EVENT(admin_expired); +DEFINE_CLIENTID_EVENT(replaced); +DEFINE_CLIENTID_EVENT(purged); +DEFINE_CLIENTID_EVENT(renew); +DEFINE_CLIENTID_EVENT(stale); + +DECLARE_EVENT_CLASS(nfsd_net_class, + TP_PROTO(const struct nfsd_net *nn), + TP_ARGS(nn), + TP_STRUCT__entry( + __field(unsigned long long, boot_time) + ), + TP_fast_assign( + __entry->boot_time = nn->boot_time; + ), + TP_printk("boot_time=%16llx", __entry->boot_time) +) + +#define DEFINE_NET_EVENT(name) \ +DEFINE_EVENT(nfsd_net_class, nfsd_##name, \ + TP_PROTO(const struct nfsd_net *nn), \ + TP_ARGS(nn)) +DEFINE_NET_EVENT(grace_start); +DEFINE_NET_EVENT(grace_complete); + +TRACE_EVENT(nfsd_writeverf_reset, + TP_PROTO( + const struct nfsd_net *nn, + const struct svc_rqst *rqstp, + int error + ), + TP_ARGS(nn, rqstp, error), + TP_STRUCT__entry( + __field(unsigned long long, boot_time) + __field(u32, xid) + __field(int, error) + __array(unsigned char, verifier, NFS4_VERIFIER_SIZE) + ), + TP_fast_assign( + __entry->boot_time = nn->boot_time; + __entry->xid = be32_to_cpu(rqstp->rq_xid); + __entry->error = error; + + /* avoid seqlock inside TP_fast_assign */ + memcpy(__entry->verifier, nn->writeverf, + NFS4_VERIFIER_SIZE); + ), + TP_printk("boot_time=%16llx xid=0x%08x error=%d new verifier=0x%s", + __entry->boot_time, __entry->xid, __entry->error, + __print_hex_str(__entry->verifier, NFS4_VERIFIER_SIZE) + ) +); + +TRACE_EVENT(nfsd_clid_cred_mismatch, + TP_PROTO( + const struct nfs4_client *clp, + const struct svc_rqst *rqstp + ), + TP_ARGS(clp, rqstp), + TP_STRUCT__entry( + __field(u32, cl_boot) + __field(u32, cl_id) + __field(unsigned long, cl_flavor) + __field(unsigned long, new_flavor) + __sockaddr(addr, rqstp->rq_xprt->xpt_remotelen) + ), + TP_fast_assign( + __entry->cl_boot = clp->cl_clientid.cl_boot; + __entry->cl_id = clp->cl_clientid.cl_id; + __entry->cl_flavor = clp->cl_cred.cr_flavor; + __entry->new_flavor = rqstp->rq_cred.cr_flavor; + __assign_sockaddr(addr, &rqstp->rq_xprt->xpt_remote, + rqstp->rq_xprt->xpt_remotelen); + ), + TP_printk("client %08x:%08x flavor=%s, conflict=%s from addr=%pISpc", + __entry->cl_boot, __entry->cl_id, + show_nfsd_authflavor(__entry->cl_flavor), + show_nfsd_authflavor(__entry->new_flavor), + __get_sockaddr(addr) + ) +) + +TRACE_EVENT(nfsd_clid_verf_mismatch, + TP_PROTO( + const struct nfs4_client *clp, + const struct svc_rqst *rqstp, + const nfs4_verifier *verf + ), + TP_ARGS(clp, rqstp, verf), + TP_STRUCT__entry( + __field(u32, cl_boot) + __field(u32, cl_id) + __array(unsigned char, cl_verifier, NFS4_VERIFIER_SIZE) + __array(unsigned char, new_verifier, NFS4_VERIFIER_SIZE) + __sockaddr(addr, rqstp->rq_xprt->xpt_remotelen) + ), + TP_fast_assign( + __entry->cl_boot = clp->cl_clientid.cl_boot; + __entry->cl_id = clp->cl_clientid.cl_id; + memcpy(__entry->cl_verifier, (void *)&clp->cl_verifier, + NFS4_VERIFIER_SIZE); + memcpy(__entry->new_verifier, (void *)verf, + NFS4_VERIFIER_SIZE); + __assign_sockaddr(addr, &rqstp->rq_xprt->xpt_remote, + rqstp->rq_xprt->xpt_remotelen); + ), + TP_printk("client %08x:%08x verf=0x%s, updated=0x%s from addr=%pISpc", + __entry->cl_boot, __entry->cl_id, + __print_hex_str(__entry->cl_verifier, NFS4_VERIFIER_SIZE), + __print_hex_str(__entry->new_verifier, NFS4_VERIFIER_SIZE), + __get_sockaddr(addr) + ) +); + +DECLARE_EVENT_CLASS(nfsd_clid_class, + TP_PROTO(const struct nfs4_client *clp), + TP_ARGS(clp), + TP_STRUCT__entry( + __field(u32, cl_boot) + __field(u32, cl_id) + __array(unsigned char, addr, sizeof(struct sockaddr_in6)) + __field(unsigned long, flavor) + __array(unsigned char, verifier, NFS4_VERIFIER_SIZE) + __string_len(name, name, clp->cl_name.len) + ), + TP_fast_assign( + __entry->cl_boot = clp->cl_clientid.cl_boot; + __entry->cl_id = clp->cl_clientid.cl_id; + memcpy(__entry->addr, &clp->cl_addr, + sizeof(struct sockaddr_in6)); + __entry->flavor = clp->cl_cred.cr_flavor; + memcpy(__entry->verifier, (void *)&clp->cl_verifier, + NFS4_VERIFIER_SIZE); + __assign_str_len(name, clp->cl_name.data, clp->cl_name.len); + ), + TP_printk("addr=%pISpc name='%s' verifier=0x%s flavor=%s client=%08x:%08x", + __entry->addr, __get_str(name), + __print_hex_str(__entry->verifier, NFS4_VERIFIER_SIZE), + show_nfsd_authflavor(__entry->flavor), + __entry->cl_boot, __entry->cl_id) +); + +#define DEFINE_CLID_EVENT(name) \ +DEFINE_EVENT(nfsd_clid_class, nfsd_clid_##name, \ + TP_PROTO(const struct nfs4_client *clp), \ + TP_ARGS(clp)) + +DEFINE_CLID_EVENT(fresh); +DEFINE_CLID_EVENT(confirmed_r); + +/* + * from fs/nfsd/filecache.h + */ #define show_nf_flags(val) \ __print_flags(val, "|", \ { 1 << NFSD_FILE_HASHED, "HASHED" }, \ { 1 << NFSD_FILE_PENDING, "PENDING" }, \ - { 1 << NFSD_FILE_BREAK_READ, "BREAK_READ" }, \ - { 1 << NFSD_FILE_BREAK_WRITE, "BREAK_WRITE" }, \ { 1 << NFSD_FILE_REFERENCED, "REFERENCED"}) -/* FIXME: This should probably be fleshed out in the future. */ -#define show_nf_may(val) \ - __print_flags(val, "|", \ - { NFSD_MAY_READ, "READ" }, \ - { NFSD_MAY_WRITE, "WRITE" }, \ - { NFSD_MAY_NOT_BREAK_LEASE, "NOT_BREAK_LEASE" }) - DECLARE_EVENT_CLASS(nfsd_file_class, TP_PROTO(struct nfsd_file *nf), TP_ARGS(nf), TP_STRUCT__entry( - __field(unsigned int, nf_hashval) __field(void *, nf_inode) __field(int, nf_ref) __field(unsigned long, nf_flags) @@ -199,19 +827,17 @@ DECLARE_EVENT_CLASS(nfsd_file_class, __field(struct file *, nf_file) ), TP_fast_assign( - __entry->nf_hashval = nf->nf_hashval; __entry->nf_inode = nf->nf_inode; __entry->nf_ref = refcount_read(&nf->nf_ref); __entry->nf_flags = nf->nf_flags; __entry->nf_may = nf->nf_may; __entry->nf_file = nf->nf_file; ), - TP_printk("hash=0x%x inode=0x%p ref=%d flags=%s may=%s file=%p", - __entry->nf_hashval, + TP_printk("inode=%p ref=%d flags=%s may=%s nf_file=%p", __entry->nf_inode, __entry->nf_ref, show_nf_flags(__entry->nf_flags), - show_nf_may(__entry->nf_may), + show_nfsd_may_flags(__entry->nf_may), __entry->nf_file) ) @@ -220,34 +846,59 @@ DEFINE_EVENT(nfsd_file_class, name, \ TP_PROTO(struct nfsd_file *nf), \ TP_ARGS(nf)) -DEFINE_NFSD_FILE_EVENT(nfsd_file_alloc); DEFINE_NFSD_FILE_EVENT(nfsd_file_put_final); DEFINE_NFSD_FILE_EVENT(nfsd_file_unhash); DEFINE_NFSD_FILE_EVENT(nfsd_file_put); -DEFINE_NFSD_FILE_EVENT(nfsd_file_unhash_and_release_locked); +DEFINE_NFSD_FILE_EVENT(nfsd_file_unhash_and_dispose); + +TRACE_EVENT(nfsd_file_alloc, + TP_PROTO( + const struct nfsd_file *nf + ), + TP_ARGS(nf), + TP_STRUCT__entry( + __field(const void *, nf_inode) + __field(unsigned long, nf_flags) + __field(unsigned long, nf_may) + __field(unsigned int, nf_ref) + ), + TP_fast_assign( + __entry->nf_inode = nf->nf_inode; + __entry->nf_flags = nf->nf_flags; + __entry->nf_ref = refcount_read(&nf->nf_ref); + __entry->nf_may = nf->nf_may; + ), + TP_printk("inode=%p ref=%u flags=%s may=%s", + __entry->nf_inode, __entry->nf_ref, + show_nf_flags(__entry->nf_flags), + show_nfsd_may_flags(__entry->nf_may) + ) +); TRACE_EVENT(nfsd_file_acquire, - TP_PROTO(struct svc_rqst *rqstp, unsigned int hash, - struct inode *inode, unsigned int may_flags, - struct nfsd_file *nf, __be32 status), + TP_PROTO( + const struct svc_rqst *rqstp, + const struct inode *inode, + unsigned int may_flags, + const struct nfsd_file *nf, + __be32 status + ), - TP_ARGS(rqstp, hash, inode, may_flags, nf, status), + TP_ARGS(rqstp, inode, may_flags, nf, status), TP_STRUCT__entry( __field(u32, xid) - __field(unsigned int, hash) - __field(void *, inode) - __field(unsigned int, may_flags) - __field(int, nf_ref) + __field(const void *, inode) + __field(unsigned long, may_flags) + __field(unsigned int, nf_ref) __field(unsigned long, nf_flags) - __field(unsigned char, nf_may) - __field(struct file *, nf_file) + __field(unsigned long, nf_may) + __field(const void *, nf_file) __field(u32, status) ), TP_fast_assign( __entry->xid = be32_to_cpu(rqstp->rq_xid); - __entry->hash = hash; __entry->inode = inode; __entry->may_flags = may_flags; __entry->nf_ref = nf ? refcount_read(&nf->nf_ref) : 0; @@ -257,39 +908,186 @@ TRACE_EVENT(nfsd_file_acquire, __entry->status = be32_to_cpu(status); ), - TP_printk("xid=0x%x hash=0x%x inode=0x%p may_flags=%s ref=%d nf_flags=%s nf_may=%s nf_file=0x%p status=%u", - __entry->xid, __entry->hash, __entry->inode, - show_nf_may(__entry->may_flags), __entry->nf_ref, - show_nf_flags(__entry->nf_flags), - show_nf_may(__entry->nf_may), __entry->nf_file, - __entry->status) + TP_printk("xid=0x%x inode=%p may_flags=%s ref=%u nf_flags=%s nf_may=%s nf_file=%p status=%u", + __entry->xid, __entry->inode, + show_nfsd_may_flags(__entry->may_flags), + __entry->nf_ref, show_nf_flags(__entry->nf_flags), + show_nfsd_may_flags(__entry->nf_may), + __entry->nf_file, __entry->status + ) ); +TRACE_EVENT(nfsd_file_create, + TP_PROTO( + const struct svc_rqst *rqstp, + unsigned int may_flags, + const struct nfsd_file *nf + ), + + TP_ARGS(rqstp, may_flags, nf), + + TP_STRUCT__entry( + __field(const void *, nf_inode) + __field(const void *, nf_file) + __field(unsigned long, may_flags) + __field(unsigned long, nf_flags) + __field(unsigned long, nf_may) + __field(unsigned int, nf_ref) + __field(u32, xid) + ), + + TP_fast_assign( + __entry->nf_inode = nf->nf_inode; + __entry->nf_file = nf->nf_file; + __entry->may_flags = may_flags; + __entry->nf_flags = nf->nf_flags; + __entry->nf_may = nf->nf_may; + __entry->nf_ref = refcount_read(&nf->nf_ref); + __entry->xid = be32_to_cpu(rqstp->rq_xid); + ), + + TP_printk("xid=0x%x inode=%p may_flags=%s ref=%u nf_flags=%s nf_may=%s nf_file=%p", + __entry->xid, __entry->nf_inode, + show_nfsd_may_flags(__entry->may_flags), + __entry->nf_ref, show_nf_flags(__entry->nf_flags), + show_nfsd_may_flags(__entry->nf_may), __entry->nf_file + ) +); + +TRACE_EVENT(nfsd_file_insert_err, + TP_PROTO( + const struct svc_rqst *rqstp, + const struct inode *inode, + unsigned int may_flags, + long error + ), + TP_ARGS(rqstp, inode, may_flags, error), + TP_STRUCT__entry( + __field(u32, xid) + __field(const void *, inode) + __field(unsigned long, may_flags) + __field(long, error) + ), + TP_fast_assign( + __entry->xid = be32_to_cpu(rqstp->rq_xid); + __entry->inode = inode; + __entry->may_flags = may_flags; + __entry->error = error; + ), + TP_printk("xid=0x%x inode=%p may_flags=%s error=%ld", + __entry->xid, __entry->inode, + show_nfsd_may_flags(__entry->may_flags), + __entry->error + ) +); + +TRACE_EVENT(nfsd_file_cons_err, + TP_PROTO( + const struct svc_rqst *rqstp, + const struct inode *inode, + unsigned int may_flags, + const struct nfsd_file *nf + ), + TP_ARGS(rqstp, inode, may_flags, nf), + TP_STRUCT__entry( + __field(u32, xid) + __field(const void *, inode) + __field(unsigned long, may_flags) + __field(unsigned int, nf_ref) + __field(unsigned long, nf_flags) + __field(unsigned long, nf_may) + __field(const void *, nf_file) + ), + TP_fast_assign( + __entry->xid = be32_to_cpu(rqstp->rq_xid); + __entry->inode = inode; + __entry->may_flags = may_flags; + __entry->nf_ref = refcount_read(&nf->nf_ref); + __entry->nf_flags = nf->nf_flags; + __entry->nf_may = nf->nf_may; + __entry->nf_file = nf->nf_file; + ), + TP_printk("xid=0x%x inode=%p may_flags=%s ref=%u nf_flags=%s nf_may=%s nf_file=%p", + __entry->xid, __entry->inode, + show_nfsd_may_flags(__entry->may_flags), __entry->nf_ref, + show_nf_flags(__entry->nf_flags), + show_nfsd_may_flags(__entry->nf_may), __entry->nf_file + ) +); + +TRACE_EVENT(nfsd_file_open, + TP_PROTO(struct nfsd_file *nf, __be32 status), + TP_ARGS(nf, status), + TP_STRUCT__entry( + __field(void *, nf_inode) /* cannot be dereferenced */ + __field(int, nf_ref) + __field(unsigned long, nf_flags) + __field(unsigned long, nf_may) + __field(void *, nf_file) /* cannot be dereferenced */ + ), + TP_fast_assign( + __entry->nf_inode = nf->nf_inode; + __entry->nf_ref = refcount_read(&nf->nf_ref); + __entry->nf_flags = nf->nf_flags; + __entry->nf_may = nf->nf_may; + __entry->nf_file = nf->nf_file; + ), + TP_printk("inode=%p ref=%d flags=%s may=%s file=%p", + __entry->nf_inode, + __entry->nf_ref, + show_nf_flags(__entry->nf_flags), + show_nfsd_may_flags(__entry->nf_may), + __entry->nf_file) +) + DECLARE_EVENT_CLASS(nfsd_file_search_class, - TP_PROTO(struct inode *inode, unsigned int hash, int found), - TP_ARGS(inode, hash, found), + TP_PROTO( + const struct inode *inode, + unsigned int count + ), + TP_ARGS(inode, count), TP_STRUCT__entry( - __field(struct inode *, inode) - __field(unsigned int, hash) - __field(int, found) + __field(const struct inode *, inode) + __field(unsigned int, count) ), TP_fast_assign( __entry->inode = inode; - __entry->hash = hash; - __entry->found = found; + __entry->count = count; ), - TP_printk("hash=0x%x inode=0x%p found=%d", __entry->hash, - __entry->inode, __entry->found) + TP_printk("inode=%p count=%u", + __entry->inode, __entry->count) ); #define DEFINE_NFSD_FILE_SEARCH_EVENT(name) \ DEFINE_EVENT(nfsd_file_search_class, name, \ - TP_PROTO(struct inode *inode, unsigned int hash, int found), \ - TP_ARGS(inode, hash, found)) + TP_PROTO( \ + const struct inode *inode, \ + unsigned int count \ + ), \ + TP_ARGS(inode, count)) DEFINE_NFSD_FILE_SEARCH_EVENT(nfsd_file_close_inode_sync); DEFINE_NFSD_FILE_SEARCH_EVENT(nfsd_file_close_inode); -DEFINE_NFSD_FILE_SEARCH_EVENT(nfsd_file_is_cached); + +TRACE_EVENT(nfsd_file_is_cached, + TP_PROTO( + const struct inode *inode, + int found + ), + TP_ARGS(inode, found), + TP_STRUCT__entry( + __field(const struct inode *, inode) + __field(int, found) + ), + TP_fast_assign( + __entry->inode = inode; + __entry->found = found; + ), + TP_printk("inode=%p is %scached", + __entry->inode, + __entry->found ? "" : "not " + ) +); TRACE_EVENT(nfsd_file_fsnotify_handle_event, TP_PROTO(struct inode *inode, u32 mask), @@ -306,10 +1104,412 @@ TRACE_EVENT(nfsd_file_fsnotify_handle_event, __entry->mode = inode->i_mode; __entry->mask = mask; ), - TP_printk("inode=0x%p nlink=%u mode=0%ho mask=0x%x", __entry->inode, + TP_printk("inode=%p nlink=%u mode=0%ho mask=0x%x", __entry->inode, __entry->nlink, __entry->mode, __entry->mask) ); +DECLARE_EVENT_CLASS(nfsd_file_gc_class, + TP_PROTO( + const struct nfsd_file *nf + ), + TP_ARGS(nf), + TP_STRUCT__entry( + __field(void *, nf_inode) + __field(void *, nf_file) + __field(int, nf_ref) + __field(unsigned long, nf_flags) + ), + TP_fast_assign( + __entry->nf_inode = nf->nf_inode; + __entry->nf_file = nf->nf_file; + __entry->nf_ref = refcount_read(&nf->nf_ref); + __entry->nf_flags = nf->nf_flags; + ), + TP_printk("inode=%p ref=%d nf_flags=%s nf_file=%p", + __entry->nf_inode, __entry->nf_ref, + show_nf_flags(__entry->nf_flags), + __entry->nf_file + ) +); + +#define DEFINE_NFSD_FILE_GC_EVENT(name) \ +DEFINE_EVENT(nfsd_file_gc_class, name, \ + TP_PROTO( \ + const struct nfsd_file *nf \ + ), \ + TP_ARGS(nf)) + +DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_lru_add); +DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_lru_add_disposed); +DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_lru_del); +DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_lru_del_disposed); +DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_gc_in_use); +DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_gc_writeback); +DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_gc_referenced); +DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_gc_hashed); +DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_gc_disposed); + +DECLARE_EVENT_CLASS(nfsd_file_lruwalk_class, + TP_PROTO( + unsigned long removed, + unsigned long remaining + ), + TP_ARGS(removed, remaining), + TP_STRUCT__entry( + __field(unsigned long, removed) + __field(unsigned long, remaining) + ), + TP_fast_assign( + __entry->removed = removed; + __entry->remaining = remaining; + ), + TP_printk("%lu entries removed, %lu remaining", + __entry->removed, __entry->remaining) +); + +#define DEFINE_NFSD_FILE_LRUWALK_EVENT(name) \ +DEFINE_EVENT(nfsd_file_lruwalk_class, name, \ + TP_PROTO( \ + unsigned long removed, \ + unsigned long remaining \ + ), \ + TP_ARGS(removed, remaining)) + +DEFINE_NFSD_FILE_LRUWALK_EVENT(nfsd_file_gc_removed); +DEFINE_NFSD_FILE_LRUWALK_EVENT(nfsd_file_shrinker_removed); + +#include "cache.h" + +TRACE_DEFINE_ENUM(RC_DROPIT); +TRACE_DEFINE_ENUM(RC_REPLY); +TRACE_DEFINE_ENUM(RC_DOIT); + +#define show_drc_retval(x) \ + __print_symbolic(x, \ + { RC_DROPIT, "DROPIT" }, \ + { RC_REPLY, "REPLY" }, \ + { RC_DOIT, "DOIT" }) + +TRACE_EVENT(nfsd_drc_found, + TP_PROTO( + const struct nfsd_net *nn, + const struct svc_rqst *rqstp, + int result + ), + TP_ARGS(nn, rqstp, result), + TP_STRUCT__entry( + __field(unsigned long long, boot_time) + __field(unsigned long, result) + __field(u32, xid) + ), + TP_fast_assign( + __entry->boot_time = nn->boot_time; + __entry->result = result; + __entry->xid = be32_to_cpu(rqstp->rq_xid); + ), + TP_printk("boot_time=%16llx xid=0x%08x result=%s", + __entry->boot_time, __entry->xid, + show_drc_retval(__entry->result)) + +); + +TRACE_EVENT(nfsd_drc_mismatch, + TP_PROTO( + const struct nfsd_net *nn, + const struct svc_cacherep *key, + const struct svc_cacherep *rp + ), + TP_ARGS(nn, key, rp), + TP_STRUCT__entry( + __field(unsigned long long, boot_time) + __field(u32, xid) + __field(u32, cached) + __field(u32, ingress) + ), + TP_fast_assign( + __entry->boot_time = nn->boot_time; + __entry->xid = be32_to_cpu(key->c_key.k_xid); + __entry->cached = (__force u32)key->c_key.k_csum; + __entry->ingress = (__force u32)rp->c_key.k_csum; + ), + TP_printk("boot_time=%16llx xid=0x%08x cached-csum=0x%08x ingress-csum=0x%08x", + __entry->boot_time, __entry->xid, __entry->cached, + __entry->ingress) +); + +TRACE_EVENT(nfsd_cb_args, + TP_PROTO( + const struct nfs4_client *clp, + const struct nfs4_cb_conn *conn + ), + TP_ARGS(clp, conn), + TP_STRUCT__entry( + __field(u32, cl_boot) + __field(u32, cl_id) + __field(u32, prog) + __field(u32, ident) + __sockaddr(addr, conn->cb_addrlen) + ), + TP_fast_assign( + __entry->cl_boot = clp->cl_clientid.cl_boot; + __entry->cl_id = clp->cl_clientid.cl_id; + __entry->prog = conn->cb_prog; + __entry->ident = conn->cb_ident; + __assign_sockaddr(addr, &conn->cb_addr, conn->cb_addrlen); + ), + TP_printk("addr=%pISpc client %08x:%08x prog=%u ident=%u", + __get_sockaddr(addr), __entry->cl_boot, __entry->cl_id, + __entry->prog, __entry->ident) +); + +TRACE_EVENT(nfsd_cb_nodelegs, + TP_PROTO(const struct nfs4_client *clp), + TP_ARGS(clp), + TP_STRUCT__entry( + __field(u32, cl_boot) + __field(u32, cl_id) + ), + TP_fast_assign( + __entry->cl_boot = clp->cl_clientid.cl_boot; + __entry->cl_id = clp->cl_clientid.cl_id; + ), + TP_printk("client %08x:%08x", __entry->cl_boot, __entry->cl_id) +) + +#define show_cb_state(val) \ + __print_symbolic(val, \ + { NFSD4_CB_UP, "UP" }, \ + { NFSD4_CB_UNKNOWN, "UNKNOWN" }, \ + { NFSD4_CB_DOWN, "DOWN" }, \ + { NFSD4_CB_FAULT, "FAULT"}) + +DECLARE_EVENT_CLASS(nfsd_cb_class, + TP_PROTO(const struct nfs4_client *clp), + TP_ARGS(clp), + TP_STRUCT__entry( + __field(unsigned long, state) + __field(u32, cl_boot) + __field(u32, cl_id) + __sockaddr(addr, clp->cl_cb_conn.cb_addrlen) + ), + TP_fast_assign( + __entry->state = clp->cl_cb_state; + __entry->cl_boot = clp->cl_clientid.cl_boot; + __entry->cl_id = clp->cl_clientid.cl_id; + __assign_sockaddr(addr, &clp->cl_cb_conn.cb_addr, + clp->cl_cb_conn.cb_addrlen) + ), + TP_printk("addr=%pISpc client %08x:%08x state=%s", + __get_sockaddr(addr), __entry->cl_boot, __entry->cl_id, + show_cb_state(__entry->state)) +); + +#define DEFINE_NFSD_CB_EVENT(name) \ +DEFINE_EVENT(nfsd_cb_class, nfsd_cb_##name, \ + TP_PROTO(const struct nfs4_client *clp), \ + TP_ARGS(clp)) + +DEFINE_NFSD_CB_EVENT(state); +DEFINE_NFSD_CB_EVENT(probe); +DEFINE_NFSD_CB_EVENT(lost); +DEFINE_NFSD_CB_EVENT(shutdown); + +TRACE_DEFINE_ENUM(RPC_AUTH_NULL); +TRACE_DEFINE_ENUM(RPC_AUTH_UNIX); +TRACE_DEFINE_ENUM(RPC_AUTH_GSS); +TRACE_DEFINE_ENUM(RPC_AUTH_GSS_KRB5); +TRACE_DEFINE_ENUM(RPC_AUTH_GSS_KRB5I); +TRACE_DEFINE_ENUM(RPC_AUTH_GSS_KRB5P); + +#define show_nfsd_authflavor(val) \ + __print_symbolic(val, \ + { RPC_AUTH_NULL, "none" }, \ + { RPC_AUTH_UNIX, "sys" }, \ + { RPC_AUTH_GSS, "gss" }, \ + { RPC_AUTH_GSS_KRB5, "krb5" }, \ + { RPC_AUTH_GSS_KRB5I, "krb5i" }, \ + { RPC_AUTH_GSS_KRB5P, "krb5p" }) + +TRACE_EVENT(nfsd_cb_setup, + TP_PROTO(const struct nfs4_client *clp, + const char *netid, + rpc_authflavor_t authflavor + ), + TP_ARGS(clp, netid, authflavor), + TP_STRUCT__entry( + __field(u32, cl_boot) + __field(u32, cl_id) + __field(unsigned long, authflavor) + __sockaddr(addr, clp->cl_cb_conn.cb_addrlen) + __array(unsigned char, netid, 8) + ), + TP_fast_assign( + __entry->cl_boot = clp->cl_clientid.cl_boot; + __entry->cl_id = clp->cl_clientid.cl_id; + strlcpy(__entry->netid, netid, sizeof(__entry->netid)); + __entry->authflavor = authflavor; + __assign_sockaddr(addr, &clp->cl_cb_conn.cb_addr, + clp->cl_cb_conn.cb_addrlen) + ), + TP_printk("addr=%pISpc client %08x:%08x proto=%s flavor=%s", + __get_sockaddr(addr), __entry->cl_boot, __entry->cl_id, + __entry->netid, show_nfsd_authflavor(__entry->authflavor)) +); + +TRACE_EVENT(nfsd_cb_setup_err, + TP_PROTO( + const struct nfs4_client *clp, + long error + ), + TP_ARGS(clp, error), + TP_STRUCT__entry( + __field(long, error) + __field(u32, cl_boot) + __field(u32, cl_id) + __sockaddr(addr, clp->cl_cb_conn.cb_addrlen) + ), + TP_fast_assign( + __entry->error = error; + __entry->cl_boot = clp->cl_clientid.cl_boot; + __entry->cl_id = clp->cl_clientid.cl_id; + __assign_sockaddr(addr, &clp->cl_cb_conn.cb_addr, + clp->cl_cb_conn.cb_addrlen) + ), + TP_printk("addr=%pISpc client %08x:%08x error=%ld", + __get_sockaddr(addr), __entry->cl_boot, __entry->cl_id, + __entry->error) +); + +TRACE_EVENT_CONDITION(nfsd_cb_recall, + TP_PROTO( + const struct nfs4_stid *stid + ), + TP_ARGS(stid), + TP_CONDITION(stid->sc_client), + TP_STRUCT__entry( + __field(u32, cl_boot) + __field(u32, cl_id) + __field(u32, si_id) + __field(u32, si_generation) + __sockaddr(addr, stid->sc_client->cl_cb_conn.cb_addrlen) + ), + TP_fast_assign( + const stateid_t *stp = &stid->sc_stateid; + const struct nfs4_client *clp = stid->sc_client; + + __entry->cl_boot = stp->si_opaque.so_clid.cl_boot; + __entry->cl_id = stp->si_opaque.so_clid.cl_id; + __entry->si_id = stp->si_opaque.so_id; + __entry->si_generation = stp->si_generation; + __assign_sockaddr(addr, &clp->cl_cb_conn.cb_addr, + clp->cl_cb_conn.cb_addrlen) + ), + TP_printk("addr=%pISpc client %08x:%08x stateid %08x:%08x", + __get_sockaddr(addr), __entry->cl_boot, __entry->cl_id, + __entry->si_id, __entry->si_generation) +); + +TRACE_EVENT(nfsd_cb_notify_lock, + TP_PROTO( + const struct nfs4_lockowner *lo, + const struct nfsd4_blocked_lock *nbl + ), + TP_ARGS(lo, nbl), + TP_STRUCT__entry( + __field(u32, cl_boot) + __field(u32, cl_id) + __field(u32, fh_hash) + __sockaddr(addr, lo->lo_owner.so_client->cl_cb_conn.cb_addrlen) + ), + TP_fast_assign( + const struct nfs4_client *clp = lo->lo_owner.so_client; + + __entry->cl_boot = clp->cl_clientid.cl_boot; + __entry->cl_id = clp->cl_clientid.cl_id; + __entry->fh_hash = knfsd_fh_hash(&nbl->nbl_fh); + __assign_sockaddr(addr, &clp->cl_cb_conn.cb_addr, + clp->cl_cb_conn.cb_addrlen) + ), + TP_printk("addr=%pISpc client %08x:%08x fh_hash=0x%08x", + __get_sockaddr(addr), __entry->cl_boot, __entry->cl_id, + __entry->fh_hash) +); + +TRACE_EVENT(nfsd_cb_offload, + TP_PROTO( + const struct nfs4_client *clp, + const stateid_t *stp, + const struct knfsd_fh *fh, + u64 count, + __be32 status + ), + TP_ARGS(clp, stp, fh, count, status), + TP_STRUCT__entry( + __field(u32, cl_boot) + __field(u32, cl_id) + __field(u32, si_id) + __field(u32, si_generation) + __field(u32, fh_hash) + __field(int, status) + __field(u64, count) + __sockaddr(addr, clp->cl_cb_conn.cb_addrlen) + ), + TP_fast_assign( + __entry->cl_boot = stp->si_opaque.so_clid.cl_boot; + __entry->cl_id = stp->si_opaque.so_clid.cl_id; + __entry->si_id = stp->si_opaque.so_id; + __entry->si_generation = stp->si_generation; + __entry->fh_hash = knfsd_fh_hash(fh); + __entry->status = be32_to_cpu(status); + __entry->count = count; + __assign_sockaddr(addr, &clp->cl_cb_conn.cb_addr, + clp->cl_cb_conn.cb_addrlen) + ), + TP_printk("addr=%pISpc client %08x:%08x stateid %08x:%08x fh_hash=0x%08x count=%llu status=%d", + __get_sockaddr(addr), __entry->cl_boot, __entry->cl_id, + __entry->si_id, __entry->si_generation, + __entry->fh_hash, __entry->count, __entry->status) +); + +DECLARE_EVENT_CLASS(nfsd_cb_done_class, + TP_PROTO( + const stateid_t *stp, + const struct rpc_task *task + ), + TP_ARGS(stp, task), + TP_STRUCT__entry( + __field(u32, cl_boot) + __field(u32, cl_id) + __field(u32, si_id) + __field(u32, si_generation) + __field(int, status) + ), + TP_fast_assign( + __entry->cl_boot = stp->si_opaque.so_clid.cl_boot; + __entry->cl_id = stp->si_opaque.so_clid.cl_id; + __entry->si_id = stp->si_opaque.so_id; + __entry->si_generation = stp->si_generation; + __entry->status = task->tk_status; + ), + TP_printk("client %08x:%08x stateid %08x:%08x status=%d", + __entry->cl_boot, __entry->cl_id, __entry->si_id, + __entry->si_generation, __entry->status + ) +); + +#define DEFINE_NFSD_CB_DONE_EVENT(name) \ +DEFINE_EVENT(nfsd_cb_done_class, name, \ + TP_PROTO( \ + const stateid_t *stp, \ + const struct rpc_task *task \ + ), \ + TP_ARGS(stp, task)) + +DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_recall_done); +DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_notify_lock_done); +DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_layout_done); +DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_offload_done); + #endif /* _NFSD_TRACE_H */ #undef TRACE_INCLUDE_PATH diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 0aa02eb18bd3..f650afedd67f 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -26,20 +26,20 @@ #include <linux/xattr.h> #include <linux/jhash.h> #include <linux/ima.h> +#include <linux/pagemap.h> #include <linux/slab.h> #include <linux/uaccess.h> #include <linux/exportfs.h> #include <linux/writeback.h> #include <linux/security.h> -#ifdef CONFIG_NFSD_V3 #include "xdr3.h" -#endif /* CONFIG_NFSD_V3 */ #ifdef CONFIG_NFSD_V4 #include "../internal.h" #include "acl.h" #include "idmap.h" +#include "xdr4.h" #endif /* CONFIG_NFSD_V4 */ #include "nfsd.h" @@ -199,27 +199,13 @@ nfsd_lookup_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp, goto out_nfserr; } } else { - /* - * In the nfsd4_open() case, this may be held across - * subsequent open and delegation acquisition which may - * need to take the child's i_mutex: - */ - fh_lock_nested(fhp, I_MUTEX_PARENT); - dentry = lookup_one_len(name, dparent, len); + dentry = lookup_one_len_unlocked(name, dparent, len); host_err = PTR_ERR(dentry); if (IS_ERR(dentry)) goto out_nfserr; if (nfsd_mountpoint(dentry, exp)) { - /* - * We don't need the i_mutex after all. It's - * still possible we could open this (regular - * files can be mountpoints too), but the - * i_mutex is just there to prevent renames of - * something that we might be about to delegate, - * and a mountpoint won't be renamed: - */ - fh_unlock(fhp); - if ((host_err = nfsd_cross_mnt(rqstp, &dentry, &exp))) { + host_err = nfsd_cross_mnt(rqstp, &dentry, &exp); + if (host_err) { dput(dentry); goto out_nfserr; } @@ -234,7 +220,15 @@ out_nfserr: return nfserrno(host_err); } -/* +/** + * nfsd_lookup - look up a single path component for nfsd + * + * @rqstp: the request context + * @fhp: the file handle of the directory + * @name: the component name, or %NULL to look up parent + * @len: length of name to examine + * @resfh: pointer to pre-initialised filehandle to hold result. + * * Look up one component of a pathname. * N.B. After this call _both_ fhp and resfh need an fh_put * @@ -244,11 +238,11 @@ out_nfserr: * returned. Otherwise the covered directory is returned. * NOTE: this mountpoint crossing is not supported properly by all * clients and is explicitly disallowed for NFSv3 - * NeilBrown <neilb@cse.unsw.edu.au> + * */ __be32 nfsd_lookup(struct svc_rqst *rqstp, struct svc_fh *fhp, const char *name, - unsigned int len, struct svc_fh *resfh) + unsigned int len, struct svc_fh *resfh) { struct svc_export *exp; struct dentry *dentry; @@ -306,6 +300,10 @@ commit_metadata(struct svc_fh *fhp) static void nfsd_sanitize_attrs(struct inode *inode, struct iattr *iap) { + /* Ignore mode updates on symlinks */ + if (S_ISLNK(inode->i_mode)) + iap->ia_valid &= ~ATTR_MODE; + /* sanitize the mode change */ if (iap->ia_valid & ATTR_MODE) { iap->ia_mode &= S_IALLUGO; @@ -333,7 +331,6 @@ nfsd_get_write_access(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap) { struct inode *inode = d_inode(fhp->fh_dentry); - int host_err; if (iap->ia_size < inode->i_size) { __be32 err; @@ -343,37 +340,80 @@ nfsd_get_write_access(struct svc_rqst *rqstp, struct svc_fh *fhp, if (err) return err; } + return nfserrno(get_write_access(inode)); +} - host_err = get_write_access(inode); - if (host_err) - goto out_nfserrno; +static int __nfsd_setattr(struct dentry *dentry, struct iattr *iap) +{ + int host_err; - host_err = locks_verify_truncate(inode, NULL, iap->ia_size); - if (host_err) - goto out_put_write_access; - return 0; + if (iap->ia_valid & ATTR_SIZE) { + /* + * RFC5661, Section 18.30.4: + * Changing the size of a file with SETATTR indirectly + * changes the time_modify and change attributes. + * + * (and similar for the older RFCs) + */ + struct iattr size_attr = { + .ia_valid = ATTR_SIZE | ATTR_CTIME | ATTR_MTIME, + .ia_size = iap->ia_size, + }; -out_put_write_access: - put_write_access(inode); -out_nfserrno: - return nfserrno(host_err); + if (iap->ia_size < 0) + return -EFBIG; + + host_err = notify_change(&init_user_ns, dentry, &size_attr, NULL); + if (host_err) + return host_err; + iap->ia_valid &= ~ATTR_SIZE; + + /* + * Avoid the additional setattr call below if the only other + * attribute that the client sends is the mtime, as we update + * it as part of the size change above. + */ + if ((iap->ia_valid & ~ATTR_MTIME) == 0) + return 0; + } + + if (!iap->ia_valid) + return 0; + + iap->ia_valid |= ATTR_CTIME; + return notify_change(&init_user_ns, dentry, iap, NULL); } -/* - * Set various file attributes. After this call fhp needs an fh_put. +/** + * nfsd_setattr - Set various file attributes. + * @rqstp: controlling RPC transaction + * @fhp: filehandle of target + * @attr: attributes to set + * @check_guard: set to 1 if guardtime is a valid timestamp + * @guardtime: do not act if ctime.tv_sec does not match this timestamp + * + * This call may adjust the contents of @attr (in particular, this + * call may change the bits in the na_iattr.ia_valid field). + * + * Returns nfs_ok on success, otherwise an NFS status code is + * returned. Caller must release @fhp by calling fh_put in either + * case. */ __be32 -nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, +nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, + struct nfsd_attrs *attr, int check_guard, time64_t guardtime) { struct dentry *dentry; struct inode *inode; + struct iattr *iap = attr->na_iattr; int accmode = NFSD_MAY_SATTR; umode_t ftype = 0; __be32 err; int host_err; bool get_write_count; bool size_change = (iap->ia_valid & ATTR_SIZE); + int retries; if (iap->ia_valid & ATTR_SIZE) { accmode |= NFSD_MAY_WRITE|NFSD_MAY_OWNER_OVERRIDE; @@ -409,13 +449,6 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, dentry = fhp->fh_dentry; inode = d_inode(dentry); - /* Ignore any mode updates on symlinks */ - if (S_ISLNK(inode->i_mode)) - iap->ia_valid &= ~ATTR_MODE; - - if (!iap->ia_valid) - return 0; - nfsd_sanitize_attrs(inode, iap); if (check_guard && guardtime != inode->i_ctime.tv_sec) @@ -434,39 +467,27 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, return err; } - fh_lock(fhp); - if (size_change) { - /* - * RFC5661, Section 18.30.4: - * Changing the size of a file with SETATTR indirectly - * changes the time_modify and change attributes. - * - * (and similar for the older RFCs) - */ - struct iattr size_attr = { - .ia_valid = ATTR_SIZE | ATTR_CTIME | ATTR_MTIME, - .ia_size = iap->ia_size, - }; - - host_err = notify_change(dentry, &size_attr, NULL); - if (host_err) - goto out_unlock; - iap->ia_valid &= ~ATTR_SIZE; - - /* - * Avoid the additional setattr call below if the only other - * attribute that the client sends is the mtime, as we update - * it as part of the size change above. - */ - if ((iap->ia_valid & ~ATTR_MTIME) == 0) - goto out_unlock; + inode_lock(inode); + for (retries = 1;;) { + host_err = __nfsd_setattr(dentry, iap); + if (host_err != -EAGAIN || !retries--) + break; + if (!nfsd_wait_for_delegreturn(rqstp, inode)) + break; } - - iap->ia_valid |= ATTR_CTIME; - host_err = notify_change(dentry, iap, NULL); - -out_unlock: - fh_unlock(fhp); + if (attr->na_seclabel && attr->na_seclabel->len) + attr->na_labelerr = security_inode_setsecctx(dentry, + attr->na_seclabel->data, attr->na_seclabel->len); + if (IS_ENABLED(CONFIG_FS_POSIX_ACL) && attr->na_pacl) + attr->na_aclerr = set_posix_acl(&init_user_ns, + inode, ACL_TYPE_ACCESS, + attr->na_pacl); + if (IS_ENABLED(CONFIG_FS_POSIX_ACL) && + !attr->na_aclerr && attr->na_dpacl && S_ISDIR(inode->i_mode)) + attr->na_aclerr = set_posix_acl(&init_user_ns, + inode, ACL_TYPE_DEFAULT, + attr->na_dpacl); + inode_unlock(inode); if (size_change) put_write_access(inode); out: @@ -499,46 +520,29 @@ int nfsd4_is_junction(struct dentry *dentry) return 0; if (!(inode->i_mode & S_ISVTX)) return 0; - if (vfs_getxattr(dentry, NFSD_JUNCTION_XATTR_NAME, NULL, 0) <= 0) + if (vfs_getxattr(&init_user_ns, dentry, NFSD_JUNCTION_XATTR_NAME, + NULL, 0) <= 0) return 0; return 1; } -#ifdef CONFIG_NFSD_V4_SECURITY_LABEL -__be32 nfsd4_set_nfs4_label(struct svc_rqst *rqstp, struct svc_fh *fhp, - struct xdr_netobj *label) -{ - __be32 error; - int host_error; - struct dentry *dentry; - - error = fh_verify(rqstp, fhp, 0 /* S_IFREG */, NFSD_MAY_SATTR); - if (error) - return error; - dentry = fhp->fh_dentry; - - inode_lock(d_inode(dentry)); - host_error = security_inode_setsecctx(dentry, label->data, label->len); - inode_unlock(d_inode(dentry)); - return nfserrno(host_error); -} -#else -__be32 nfsd4_set_nfs4_label(struct svc_rqst *rqstp, struct svc_fh *fhp, - struct xdr_netobj *label) +static struct nfsd4_compound_state *nfsd4_get_cstate(struct svc_rqst *rqstp) { - return nfserr_notsupp; + return &((struct nfsd4_compoundres *)rqstp->rq_resp)->cstate; } -#endif -__be32 nfsd4_clone_file_range(struct nfsd_file *nf_src, u64 src_pos, - struct nfsd_file *nf_dst, u64 dst_pos, u64 count, bool sync) +__be32 nfsd4_clone_file_range(struct svc_rqst *rqstp, + struct nfsd_file *nf_src, u64 src_pos, + struct nfsd_file *nf_dst, u64 dst_pos, + u64 count, bool sync) { struct file *src = nf_src->nf_file; struct file *dst = nf_dst->nf_file; + errseq_t since; loff_t cloned; __be32 ret = 0; - down_write(&nf_dst->nf_rwsem); + since = READ_ONCE(dst->f_wb_err); cloned = vfs_clone_file_range(src, src_pos, dst, dst_pos, count, 0); if (cloned < 0) { ret = nfserrno(cloned); @@ -553,21 +557,32 @@ __be32 nfsd4_clone_file_range(struct nfsd_file *nf_src, u64 src_pos, int status = vfs_fsync_range(dst, dst_pos, dst_end, 0); if (!status) + status = filemap_check_wb_err(dst->f_mapping, since); + if (!status) status = commit_inode_metadata(file_inode(src)); if (status < 0) { - nfsd_reset_boot_verifier(net_generic(nf_dst->nf_net, - nfsd_net_id)); + struct nfsd_net *nn = net_generic(nf_dst->nf_net, + nfsd_net_id); + + trace_nfsd_clone_file_range_err(rqstp, + &nfsd4_get_cstate(rqstp)->save_fh, + src_pos, + &nfsd4_get_cstate(rqstp)->current_fh, + dst_pos, + count, status); + nfsd_reset_write_verifier(nn); + trace_nfsd_writeverf_reset(nn, rqstp, status); ret = nfserrno(status); } } out_err: - up_write(&nf_dst->nf_rwsem); return ret; } ssize_t nfsd_copy_file_range(struct file *src, u64 src_pos, struct file *dst, u64 dst_pos, u64 count) { + ssize_t ret; /* * Limit copy to 4MB to prevent indefinitely blocking an nfsd @@ -578,7 +593,12 @@ ssize_t nfsd_copy_file_range(struct file *src, u64 src_pos, struct file *dst, * limit like this and pipeline multiple COPY requests. */ count = min_t(u64, count, 1 << 22); - return vfs_copy_file_range(src, src_pos, dst, dst_pos, count, 0); + ret = vfs_copy_file_range(src, src_pos, dst, dst_pos, count, 0); + + if (ret == -EOPNOTSUPP || ret == -EXDEV) + ret = generic_copy_file_range(src, src_pos, dst, dst_pos, + count, 0); + return ret; } __be32 nfsd4_vfs_fallocate(struct svc_rqst *rqstp, struct svc_fh *fhp, @@ -598,7 +618,6 @@ __be32 nfsd4_vfs_fallocate(struct svc_rqst *rqstp, struct svc_fh *fhp, } #endif /* defined(CONFIG_NFSD_V4) */ -#ifdef CONFIG_NFSD_V3 /* * Check server access rights to a file system object */ @@ -612,6 +631,12 @@ static struct accessmap nfs3_regaccess[] = { { NFS3_ACCESS_MODIFY, NFSD_MAY_WRITE|NFSD_MAY_TRUNC }, { NFS3_ACCESS_EXTEND, NFSD_MAY_WRITE }, +#ifdef CONFIG_NFSD_V4 + { NFS4_ACCESS_XAREAD, NFSD_MAY_READ }, + { NFS4_ACCESS_XAWRITE, NFSD_MAY_WRITE }, + { NFS4_ACCESS_XALIST, NFSD_MAY_READ }, +#endif + { 0, 0 } }; @@ -622,6 +647,12 @@ static struct accessmap nfs3_diraccess[] = { { NFS3_ACCESS_EXTEND, NFSD_MAY_EXEC|NFSD_MAY_WRITE }, { NFS3_ACCESS_DELETE, NFSD_MAY_REMOVE }, +#ifdef CONFIG_NFSD_V4 + { NFS4_ACCESS_XAREAD, NFSD_MAY_READ }, + { NFS4_ACCESS_XAWRITE, NFSD_MAY_WRITE }, + { NFS4_ACCESS_XALIST, NFSD_MAY_READ }, +#endif + { 0, 0 } }; @@ -698,7 +729,6 @@ nfsd_access(struct svc_rqst *rqstp, struct svc_fh *fhp, u32 *access, u32 *suppor out: return error; } -#endif /* CONFIG_NFSD_V3 */ int nfsd_open_break_lease(struct inode *inode, int access) { @@ -731,19 +761,9 @@ __nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, path.dentry = fhp->fh_dentry; inode = d_inode(path.dentry); - /* Disallow write access to files with the append-only bit set - * or any access when mandatory locking enabled - */ err = nfserr_perm; if (IS_APPEND(inode) && (may_flags & NFSD_MAY_WRITE)) goto out; - /* - * We must ignore files (but only files) which might have mandatory - * locks on them because there is no way to know if the accesser has - * the lock. - */ - if (S_ISREG((inode)->i_mode) && mandatory_lock(inode)) - goto out; if (!inode->i_fop) goto out; @@ -788,6 +808,7 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, int may_flags, struct file **filp) { __be32 err; + bool retried = false; validate_process_creds(); /* @@ -803,21 +824,37 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, */ if (type == S_IFREG) may_flags |= NFSD_MAY_OWNER_OVERRIDE; +retry: err = fh_verify(rqstp, fhp, type, may_flags); - if (!err) + if (!err) { err = __nfsd_open(rqstp, fhp, type, may_flags, filp); + if (err == nfserr_stale && !retried) { + retried = true; + fh_put(fhp); + goto retry; + } + } validate_process_creds(); return err; } +/** + * nfsd_open_verified - Open a regular file for the filecache + * @rqstp: RPC request + * @fhp: NFS filehandle of the file to open + * @may_flags: internal permission flags + * @filp: OUT: open "struct file *" + * + * Returns an nfsstat value in network byte order. + */ __be32 -nfsd_open_verified(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, - int may_flags, struct file **filp) +nfsd_open_verified(struct svc_rqst *rqstp, struct svc_fh *fhp, int may_flags, + struct file **filp) { __be32 err; validate_process_creds(); - err = __nfsd_open(rqstp, fhp, type, may_flags, filp); + err = __nfsd_open(rqstp, fhp, S_IFREG, may_flags, filp); validate_process_creds(); return err; } @@ -832,28 +869,16 @@ nfsd_splice_actor(struct pipe_inode_info *pipe, struct pipe_buffer *buf, struct splice_desc *sd) { struct svc_rqst *rqstp = sd->u.data; - struct page **pp = rqstp->rq_next_page; - struct page *page = buf->page; - size_t size; - - size = sd->len; - - if (rqstp->rq_res.page_len == 0) { - get_page(page); - put_page(*rqstp->rq_next_page); - *(rqstp->rq_next_page++) = page; - rqstp->rq_res.page_base = buf->offset; - rqstp->rq_res.page_len = size; - } else if (page != pp[-1]) { - get_page(page); - if (*rqstp->rq_next_page) - put_page(*rqstp->rq_next_page); - *(rqstp->rq_next_page++) = page; - rqstp->rq_res.page_len += size; - } else - rqstp->rq_res.page_len += size; - - return size; + struct page *page = buf->page; // may be a compound one + unsigned offset = buf->offset; + + page += offset / PAGE_SIZE; + for (int i = sd->len; i > 0; i -= PAGE_SIZE) + svc_rqst_replace_page(rqstp, page++); + if (rqstp->rq_res.page_len == 0) // first call + rqstp->rq_res.page_base = offset % PAGE_SIZE; + rqstp->rq_res.page_len += sd->len; + return sd->len; } static int nfsd_direct_splice_actor(struct pipe_inode_info *pipe, @@ -877,7 +902,7 @@ static __be32 nfsd_finish_read(struct svc_rqst *rqstp, struct svc_fh *fhp, unsigned long *count, u32 *eof, ssize_t host_err) { if (host_err >= 0) { - nfsdstats.io_read += host_err; + nfsd_stats_io_read_add(fhp->fh_export, host_err); *eof = nfsd_eof_on_read(file, offset, host_err, *count); *count = host_err; fsnotify_access(file); @@ -965,26 +990,38 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf, unsigned long *cnt, int stable, __be32 *verf) { + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); struct file *file = nf->nf_file; + struct super_block *sb = file_inode(file)->i_sb; struct svc_export *exp; struct iov_iter iter; + errseq_t since; __be32 nfserr; int host_err; int use_wgather; loff_t pos = offset; + unsigned long exp_op_flags = 0; unsigned int pflags = current->flags; rwf_t flags = 0; + bool restore_flags = false; trace_nfsd_write_opened(rqstp, fhp, offset, *cnt); - if (test_bit(RQ_LOCAL, &rqstp->rq_flags)) + if (sb->s_export_op) + exp_op_flags = sb->s_export_op->flags; + + if (test_bit(RQ_LOCAL, &rqstp->rq_flags) && + !(exp_op_flags & EXPORT_OP_REMOTE_FS)) { /* - * We want less throttling in balance_dirty_pages() - * and shrink_inactive_list() so that nfs to + * We want throttling in balance_dirty_pages() + * and shrink_inactive_list() to only consider + * the backingdev we are writing to, so that nfs to * localhost doesn't cause nfsd to lock up due to all * the client's dirty pages or its congested queue. */ - current->flags |= PF_LESS_THROTTLE; + current->flags |= PF_LOCAL_THROTTLE; + restore_flags = true; + } exp = fhp->fh_export; use_wgather = (rqstp->rq_vers == 2) && EX_WGATHER(exp); @@ -996,36 +1033,28 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf, flags |= RWF_SYNC; iov_iter_kvec(&iter, WRITE, vec, vlen, *cnt); - if (flags & RWF_SYNC) { - down_write(&nf->nf_rwsem); - host_err = vfs_iter_write(file, &iter, &pos, flags); - if (host_err < 0) - nfsd_reset_boot_verifier(net_generic(SVC_NET(rqstp), - nfsd_net_id)); - up_write(&nf->nf_rwsem); - } else { - down_read(&nf->nf_rwsem); - if (verf) - nfsd_copy_boot_verifier(verf, - net_generic(SVC_NET(rqstp), - nfsd_net_id)); - host_err = vfs_iter_write(file, &iter, &pos, flags); - up_read(&nf->nf_rwsem); - } + since = READ_ONCE(file->f_wb_err); + if (verf) + nfsd_copy_write_verifier(verf, nn); + host_err = vfs_iter_write(file, &iter, &pos, flags); if (host_err < 0) { - nfsd_reset_boot_verifier(net_generic(SVC_NET(rqstp), - nfsd_net_id)); + nfsd_reset_write_verifier(nn); + trace_nfsd_writeverf_reset(nn, rqstp, host_err); goto out_nfserr; } *cnt = host_err; - nfsdstats.io_write += *cnt; + nfsd_stats_io_write_add(exp, *cnt); fsnotify_modify(file); + host_err = filemap_check_wb_err(file->f_mapping, since); + if (host_err < 0) + goto out_nfserr; if (stable && use_wgather) { host_err = wait_for_concurrent_writes(file); - if (host_err < 0) - nfsd_reset_boot_verifier(net_generic(SVC_NET(rqstp), - nfsd_net_id)); + if (host_err < 0) { + nfsd_reset_write_verifier(nn); + trace_nfsd_writeverf_reset(nn, rqstp, host_err); + } } out_nfserr: @@ -1036,8 +1065,8 @@ out_nfserr: trace_nfsd_write_err(rqstp, fhp, offset, host_err); nfserr = nfserrno(host_err); } - if (test_bit(RQ_LOCAL, &rqstp->rq_flags)) - current_restore_flags(pflags, PF_LESS_THROTTLE); + if (restore_flags) + current_restore_flags(pflags, PF_LOCAL_THROTTLE); return nfserr; } @@ -1099,73 +1128,105 @@ out: return err; } -#ifdef CONFIG_NFSD_V3 -/* - * Commit all pending writes to stable storage. +/** + * nfsd_commit - Commit pending writes to stable storage + * @rqstp: RPC request being processed + * @fhp: NFS filehandle + * @offset: raw offset from beginning of file + * @count: raw count of bytes to sync + * @verf: filled in with the server's current write verifier * - * Note: we only guarantee that data that lies within the range specified - * by the 'offset' and 'count' parameters will be synced. + * Note: we guarantee that data that lies within the range specified + * by the 'offset' and 'count' parameters will be synced. The server + * is permitted to sync data that lies outside this range at the + * same time. * * Unfortunately we cannot lock the file to make sure we return full WCC * data to the client, as locking happens lower down in the filesystem. + * + * Return values: + * An nfsstat value in network byte order. */ __be32 -nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp, - loff_t offset, unsigned long count, __be32 *verf) +nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp, u64 offset, + u32 count, __be32 *verf) { + u64 maxbytes; + loff_t start, end; + struct nfsd_net *nn; struct nfsd_file *nf; - loff_t end = LLONG_MAX; - __be32 err = nfserr_inval; - - if (offset < 0) - goto out; - if (count != 0) { - end = offset + (loff_t)count - 1; - if (end < offset) - goto out; - } + __be32 err; err = nfsd_file_acquire(rqstp, fhp, NFSD_MAY_WRITE|NFSD_MAY_NOT_BREAK_LEASE, &nf); if (err) goto out; + + /* + * Convert the client-provided (offset, count) range to a + * (start, end) range. If the client-provided range falls + * outside the maximum file size of the underlying FS, + * clamp the sync range appropriately. + */ + start = 0; + end = LLONG_MAX; + maxbytes = (u64)fhp->fh_dentry->d_sb->s_maxbytes; + if (offset < maxbytes) { + start = offset; + if (count && (offset + count - 1 < maxbytes)) + end = offset + count - 1; + } + + nn = net_generic(nf->nf_net, nfsd_net_id); if (EX_ISSYNC(fhp->fh_export)) { + errseq_t since = READ_ONCE(nf->nf_file->f_wb_err); int err2; - down_write(&nf->nf_rwsem); - err2 = vfs_fsync_range(nf->nf_file, offset, end, 0); + err2 = vfs_fsync_range(nf->nf_file, start, end, 0); switch (err2) { case 0: - nfsd_copy_boot_verifier(verf, net_generic(nf->nf_net, - nfsd_net_id)); + nfsd_copy_write_verifier(verf, nn); + err2 = filemap_check_wb_err(nf->nf_file->f_mapping, + since); + err = nfserrno(err2); break; case -EINVAL: err = nfserr_notsupp; break; default: + nfsd_reset_write_verifier(nn); + trace_nfsd_writeverf_reset(nn, rqstp, err2); err = nfserrno(err2); - nfsd_reset_boot_verifier(net_generic(nf->nf_net, - nfsd_net_id)); } - up_write(&nf->nf_rwsem); } else - nfsd_copy_boot_verifier(verf, net_generic(nf->nf_net, - nfsd_net_id)); + nfsd_copy_write_verifier(verf, nn); nfsd_file_put(nf); out: return err; } -#endif /* CONFIG_NFSD_V3 */ -static __be32 -nfsd_create_setattr(struct svc_rqst *rqstp, struct svc_fh *resfhp, - struct iattr *iap) +/** + * nfsd_create_setattr - Set a created file's attributes + * @rqstp: RPC transaction being executed + * @fhp: NFS filehandle of parent directory + * @resfhp: NFS filehandle of new object + * @attrs: requested attributes of new object + * + * Returns nfs_ok on success, or an nfsstat in network byte order. + */ +__be32 +nfsd_create_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, + struct svc_fh *resfhp, struct nfsd_attrs *attrs) { + struct iattr *iap = attrs->na_iattr; + __be32 status; + /* - * Mode has already been set earlier in create: + * Mode has already been set by file creation. */ iap->ia_valid &= ~ATTR_MODE; + /* * Setting uid/gid works only for root. Irix appears to * send along the gid on create when it tries to implement @@ -1173,10 +1234,31 @@ nfsd_create_setattr(struct svc_rqst *rqstp, struct svc_fh *resfhp, */ if (!uid_eq(current_fsuid(), GLOBAL_ROOT_UID)) iap->ia_valid &= ~(ATTR_UID|ATTR_GID); + + /* + * Callers expect new file metadata to be committed even + * if the attributes have not changed. + */ if (iap->ia_valid) - return nfsd_setattr(rqstp, resfhp, iap, 0, (time64_t)0); - /* Callers expect file metadata to be committed here */ - return nfserrno(commit_metadata(resfhp)); + status = nfsd_setattr(rqstp, resfhp, attrs, 0, (time64_t)0); + else + status = nfserrno(commit_metadata(resfhp)); + + /* + * Transactional filesystems had a chance to commit changes + * for both parent and child simultaneously making the + * following commit_metadata a noop in many cases. + */ + if (!status) + status = nfserrno(commit_metadata(fhp)); + + /* + * Update the new filehandle to pick up the new attributes. + */ + if (!status) + status = fh_update(resfhp); + + return status; } /* HPUX client sometimes creates a file in mode 000, and sets size to 0. @@ -1197,26 +1279,19 @@ nfsd_check_ignore_resizing(struct iattr *iap) /* The parent directory should already be locked: */ __be32 nfsd_create_locked(struct svc_rqst *rqstp, struct svc_fh *fhp, - char *fname, int flen, struct iattr *iap, - int type, dev_t rdev, struct svc_fh *resfhp) + struct nfsd_attrs *attrs, + int type, dev_t rdev, struct svc_fh *resfhp) { struct dentry *dentry, *dchild; struct inode *dirp; + struct iattr *iap = attrs->na_iattr; __be32 err; - __be32 err2; int host_err; dentry = fhp->fh_dentry; dirp = d_inode(dentry); dchild = dget(resfhp->fh_dentry); - if (!fhp->fh_locked) { - WARN_ONCE(1, "nfsd_create: parent %pd2 not locked!\n", - dentry); - err = nfserr_io; - goto out; - } - err = nfsd_permission(rqstp, fhp->fh_export, dentry, NFSD_MAY_CREATE); if (err) goto out; @@ -1225,16 +1300,19 @@ nfsd_create_locked(struct svc_rqst *rqstp, struct svc_fh *fhp, iap->ia_mode = 0; iap->ia_mode = (iap->ia_mode & S_IALLUGO) | type; + if (!IS_POSIXACL(dirp)) + iap->ia_mode &= ~current_umask(); + err = 0; host_err = 0; switch (type) { case S_IFREG: - host_err = vfs_create(dirp, dchild, iap->ia_mode, true); + host_err = vfs_create(&init_user_ns, dirp, dchild, iap->ia_mode, true); if (!host_err) nfsd_check_ignore_resizing(iap); break; case S_IFDIR: - host_err = vfs_mkdir(dirp, dchild, iap->ia_mode); + host_err = vfs_mkdir(&init_user_ns, dirp, dchild, iap->ia_mode); if (!host_err && unlikely(d_unhashed(dchild))) { struct dentry *d; d = lookup_one_len(dchild->d_name.name, @@ -1262,7 +1340,8 @@ nfsd_create_locked(struct svc_rqst *rqstp, struct svc_fh *fhp, case S_IFBLK: case S_IFIFO: case S_IFSOCK: - host_err = vfs_mknod(dirp, dchild, iap->ia_mode, rdev); + host_err = vfs_mknod(&init_user_ns, dirp, dchild, + iap->ia_mode, rdev); break; default: printk(KERN_WARNING "nfsd: bad file type %o in nfsd_create\n", @@ -1272,22 +1351,8 @@ nfsd_create_locked(struct svc_rqst *rqstp, struct svc_fh *fhp, if (host_err < 0) goto out_nfserr; - err = nfsd_create_setattr(rqstp, resfhp, iap); + err = nfsd_create_setattr(rqstp, fhp, resfhp, attrs); - /* - * nfsd_create_setattr already committed the child. Transactional - * filesystems had a chance to commit changes for both parent and - * child simultaneously making the following commit_metadata a - * noop. - */ - err2 = nfserrno(commit_metadata(fhp)); - if (err2) - err = err2; - /* - * Update the file handle to get the new inode info. - */ - if (!err) - err = fh_update(resfhp); out: dput(dchild); return err; @@ -1305,8 +1370,8 @@ out_nfserr: */ __be32 nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, - char *fname, int flen, struct iattr *iap, - int type, dev_t rdev, struct svc_fh *resfhp) + char *fname, int flen, struct nfsd_attrs *attrs, + int type, dev_t rdev, struct svc_fh *resfhp) { struct dentry *dentry, *dchild = NULL; __be32 err; @@ -1325,11 +1390,13 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, if (host_err) return nfserrno(host_err); - fh_lock_nested(fhp, I_MUTEX_PARENT); + inode_lock_nested(dentry->d_inode, I_MUTEX_PARENT); dchild = lookup_one_len(fname, dentry, flen); host_err = PTR_ERR(dchild); - if (IS_ERR(dchild)) - return nfserrno(host_err); + if (IS_ERR(dchild)) { + err = nfserrno(host_err); + goto out_unlock; + } err = fh_compose(resfhp, fhp->fh_export, dchild, fhp); /* * We unconditionally drop our ref to dchild as fh_compose will have @@ -1337,175 +1404,14 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, */ dput(dchild); if (err) - return err; - return nfsd_create_locked(rqstp, fhp, fname, flen, iap, type, - rdev, resfhp); -} - -#ifdef CONFIG_NFSD_V3 - -/* - * NFSv3 and NFSv4 version of nfsd_create - */ -__be32 -do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, - char *fname, int flen, struct iattr *iap, - struct svc_fh *resfhp, int createmode, u32 *verifier, - bool *truncp, bool *created) -{ - struct dentry *dentry, *dchild = NULL; - struct inode *dirp; - __be32 err; - int host_err; - __u32 v_mtime=0, v_atime=0; - - err = nfserr_perm; - if (!flen) - goto out; - err = nfserr_exist; - if (isdotent(fname, flen)) - goto out; - if (!(iap->ia_valid & ATTR_MODE)) - iap->ia_mode = 0; - err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_EXEC); - if (err) - goto out; - - dentry = fhp->fh_dentry; - dirp = d_inode(dentry); - - host_err = fh_want_write(fhp); - if (host_err) - goto out_nfserr; - - fh_lock_nested(fhp, I_MUTEX_PARENT); - - /* - * Compose the response file handle. - */ - dchild = lookup_one_len(fname, dentry, flen); - host_err = PTR_ERR(dchild); - if (IS_ERR(dchild)) - goto out_nfserr; - - /* If file doesn't exist, check for permissions to create one */ - if (d_really_is_negative(dchild)) { - err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_CREATE); - if (err) - goto out; - } - - err = fh_compose(resfhp, fhp->fh_export, dchild, fhp); - if (err) - goto out; - - if (nfsd_create_is_exclusive(createmode)) { - /* solaris7 gets confused (bugid 4218508) if these have - * the high bit set, so just clear the high bits. If this is - * ever changed to use different attrs for storing the - * verifier, then do_open_lookup() will also need to be fixed - * accordingly. - */ - v_mtime = verifier[0]&0x7fffffff; - v_atime = verifier[1]&0x7fffffff; - } - - if (d_really_is_positive(dchild)) { - err = 0; - - switch (createmode) { - case NFS3_CREATE_UNCHECKED: - if (! d_is_reg(dchild)) - goto out; - else if (truncp) { - /* in nfsv4, we need to treat this case a little - * differently. we don't want to truncate the - * file now; this would be wrong if the OPEN - * fails for some other reason. furthermore, - * if the size is nonzero, we should ignore it - * according to spec! - */ - *truncp = (iap->ia_valid & ATTR_SIZE) && !iap->ia_size; - } - else { - iap->ia_valid &= ATTR_SIZE; - goto set_attr; - } - break; - case NFS3_CREATE_EXCLUSIVE: - if ( d_inode(dchild)->i_mtime.tv_sec == v_mtime - && d_inode(dchild)->i_atime.tv_sec == v_atime - && d_inode(dchild)->i_size == 0 ) { - if (created) - *created = true; - break; - } - /* fall through */ - case NFS4_CREATE_EXCLUSIVE4_1: - if ( d_inode(dchild)->i_mtime.tv_sec == v_mtime - && d_inode(dchild)->i_atime.tv_sec == v_atime - && d_inode(dchild)->i_size == 0 ) { - if (created) - *created = true; - goto set_attr; - } - /* fall through */ - case NFS3_CREATE_GUARDED: - err = nfserr_exist; - } - fh_drop_write(fhp); - goto out; - } - - host_err = vfs_create(dirp, dchild, iap->ia_mode, true); - if (host_err < 0) { - fh_drop_write(fhp); - goto out_nfserr; - } - if (created) - *created = true; - - nfsd_check_ignore_resizing(iap); - - if (nfsd_create_is_exclusive(createmode)) { - /* Cram the verifier into atime/mtime */ - iap->ia_valid = ATTR_MTIME|ATTR_ATIME - | ATTR_MTIME_SET|ATTR_ATIME_SET; - /* XXX someone who knows this better please fix it for nsec */ - iap->ia_mtime.tv_sec = v_mtime; - iap->ia_atime.tv_sec = v_atime; - iap->ia_mtime.tv_nsec = 0; - iap->ia_atime.tv_nsec = 0; - } - - set_attr: - err = nfsd_create_setattr(rqstp, resfhp, iap); - - /* - * nfsd_create_setattr already committed the child - * (and possibly also the parent). - */ - if (!err) - err = nfserrno(commit_metadata(fhp)); - - /* - * Update the filehandle to get the new inode info. - */ - if (!err) - err = fh_update(resfhp); - - out: - fh_unlock(fhp); - if (dchild && !IS_ERR(dchild)) - dput(dchild); - fh_drop_write(fhp); - return err; - - out_nfserr: - err = nfserrno(host_err); - goto out; + goto out_unlock; + fh_fill_pre_attrs(fhp); + err = nfsd_create_locked(rqstp, fhp, attrs, type, rdev, resfhp); + fh_fill_post_attrs(fhp); +out_unlock: + inode_unlock(dentry->d_inode); + return err; } -#endif /* CONFIG_NFSD_V3 */ /* * Read a symlink. On entry, *lenp must contain the maximum path length that @@ -1545,15 +1451,25 @@ nfsd_readlink(struct svc_rqst *rqstp, struct svc_fh *fhp, char *buf, int *lenp) return 0; } -/* - * Create a symlink and look up its inode +/** + * nfsd_symlink - Create a symlink and look up its inode + * @rqstp: RPC transaction being executed + * @fhp: NFS filehandle of parent directory + * @fname: filename of the new symlink + * @flen: length of @fname + * @path: content of the new symlink (NUL-terminated) + * @attrs: requested attributes of new object + * @resfhp: NFS filehandle of new object + * * N.B. After this call _both_ fhp and resfhp need an fh_put + * + * Returns nfs_ok on success, or an nfsstat in network byte order. */ __be32 nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp, - char *fname, int flen, - char *path, - struct svc_fh *resfhp) + char *fname, int flen, + char *path, struct nfsd_attrs *attrs, + struct svc_fh *resfhp) { struct dentry *dentry, *dnew; __be32 err, cerr; @@ -1571,33 +1487,35 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp, goto out; host_err = fh_want_write(fhp); - if (host_err) - goto out_nfserr; + if (host_err) { + err = nfserrno(host_err); + goto out; + } - fh_lock(fhp); dentry = fhp->fh_dentry; + inode_lock_nested(dentry->d_inode, I_MUTEX_PARENT); dnew = lookup_one_len(fname, dentry, flen); - host_err = PTR_ERR(dnew); - if (IS_ERR(dnew)) - goto out_nfserr; - - host_err = vfs_symlink(d_inode(dentry), dnew, path); + if (IS_ERR(dnew)) { + err = nfserrno(PTR_ERR(dnew)); + inode_unlock(dentry->d_inode); + goto out_drop_write; + } + fh_fill_pre_attrs(fhp); + host_err = vfs_symlink(&init_user_ns, d_inode(dentry), dnew, path); err = nfserrno(host_err); + cerr = fh_compose(resfhp, fhp->fh_export, dnew, fhp); + if (!err) + nfsd_create_setattr(rqstp, fhp, resfhp, attrs); + fh_fill_post_attrs(fhp); + inode_unlock(dentry->d_inode); if (!err) err = nfserrno(commit_metadata(fhp)); - fh_unlock(fhp); - - fh_drop_write(fhp); - - cerr = fh_compose(resfhp, fhp->fh_export, dnew, fhp); dput(dnew); if (err==0) err = cerr; +out_drop_write: + fh_drop_write(fhp); out: return err; - -out_nfserr: - err = nfserrno(host_err); - goto out; } /* @@ -1635,21 +1553,25 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp, goto out; } - fh_lock_nested(ffhp, I_MUTEX_PARENT); ddir = ffhp->fh_dentry; dirp = d_inode(ddir); + inode_lock_nested(dirp, I_MUTEX_PARENT); dnew = lookup_one_len(name, ddir, len); - host_err = PTR_ERR(dnew); - if (IS_ERR(dnew)) - goto out_nfserr; + if (IS_ERR(dnew)) { + err = nfserrno(PTR_ERR(dnew)); + goto out_unlock; + } dold = tfhp->fh_dentry; err = nfserr_noent; if (d_really_is_negative(dold)) goto out_dput; - host_err = vfs_link(dold, dirp, dnew, NULL); + fh_fill_pre_attrs(ffhp); + host_err = vfs_link(dold, &init_user_ns, dirp, dnew, NULL); + fh_fill_post_attrs(ffhp); + inode_unlock(dirp); if (!host_err) { err = nfserrno(commit_metadata(ffhp)); if (!err) @@ -1660,17 +1582,17 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp, else err = nfserrno(host_err); } -out_dput: dput(dnew); -out_unlock: - fh_unlock(ffhp); +out_drop_write: fh_drop_write(tfhp); out: return err; -out_nfserr: - err = nfserrno(host_err); - goto out_unlock; +out_dput: + dput(dnew); +out_unlock: + inode_unlock(dirp); + goto out_drop_write; } static void @@ -1705,7 +1627,7 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen, struct inode *fdir, *tdir; __be32 err; int host_err; - bool has_cached = false; + bool close_cached = false; err = fh_verify(rqstp, ffhp, S_IFDIR, NFSD_MAY_REMOVE); if (err) @@ -1731,12 +1653,9 @@ retry: goto out; } - /* cannot use fh_lock as we need deadlock protective ordering - * so do it by hand */ trap = lock_rename(tdentry, fdentry); - ffhp->fh_locked = tfhp->fh_locked = true; - fill_pre_wcc(ffhp); - fill_pre_wcc(tfhp); + fh_fill_pre_attrs(ffhp); + fh_fill_pre_attrs(tfhp); odentry = lookup_one_len(fname, fdentry, flen); host_err = PTR_ERR(odentry); @@ -1764,11 +1683,28 @@ retry: if (ffhp->fh_export->ex_path.dentry != tfhp->fh_export->ex_path.dentry) goto out_dput_new; - if (nfsd_has_cached_files(ndentry)) { - has_cached = true; + if ((ndentry->d_sb->s_export_op->flags & EXPORT_OP_CLOSE_BEFORE_UNLINK) && + nfsd_has_cached_files(ndentry)) { + close_cached = true; goto out_dput_old; } else { - host_err = vfs_rename(fdir, odentry, tdir, ndentry, NULL, 0); + struct renamedata rd = { + .old_mnt_userns = &init_user_ns, + .old_dir = fdir, + .old_dentry = odentry, + .new_mnt_userns = &init_user_ns, + .new_dir = tdir, + .new_dentry = ndentry, + }; + int retries; + + for (retries = 1;;) { + host_err = vfs_rename(&rd); + if (host_err != -EAGAIN || !retries--) + break; + if (!nfsd_wait_for_delegreturn(rqstp, d_inode(odentry))) + break; + } if (!host_err) { host_err = commit_metadata(tfhp); if (!host_err) @@ -1781,17 +1717,12 @@ retry: dput(odentry); out_nfserr: err = nfserrno(host_err); - /* - * We cannot rely on fh_unlock on the two filehandles, - * as that would do the wrong thing if the two directories - * were the same, so again we do it by hand. - */ - if (!has_cached) { - fill_post_wcc(ffhp); - fill_post_wcc(tfhp); + + if (!close_cached) { + fh_fill_post_attrs(ffhp); + fh_fill_post_attrs(tfhp); } unlock_rename(tdentry, fdentry); - ffhp->fh_locked = tfhp->fh_locked = false; fh_drop_write(ffhp); /* @@ -1800,8 +1731,8 @@ retry: * shouldn't be done with locks held however, so we delay it until this * point and then reattempt the whole shebang. */ - if (has_cached) { - has_cached = false; + if (close_cached) { + close_cached = false; nfsd_close_cached_files(ndentry); dput(ndentry); goto retry; @@ -1820,6 +1751,7 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, { struct dentry *dentry, *rdentry; struct inode *dirp; + struct inode *rinode; __be32 err; int host_err; @@ -1834,34 +1766,50 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, if (host_err) goto out_nfserr; - fh_lock_nested(fhp, I_MUTEX_PARENT); dentry = fhp->fh_dentry; dirp = d_inode(dentry); + inode_lock_nested(dirp, I_MUTEX_PARENT); rdentry = lookup_one_len(fname, dentry, flen); host_err = PTR_ERR(rdentry); if (IS_ERR(rdentry)) - goto out_drop_write; + goto out_unlock; if (d_really_is_negative(rdentry)) { dput(rdentry); host_err = -ENOENT; - goto out_drop_write; + goto out_unlock; } + rinode = d_inode(rdentry); + ihold(rinode); if (!type) type = d_inode(rdentry)->i_mode & S_IFMT; + fh_fill_pre_attrs(fhp); if (type != S_IFDIR) { - nfsd_close_cached_files(rdentry); - host_err = vfs_unlink(dirp, rdentry, NULL); + int retries; + + if (rdentry->d_sb->s_export_op->flags & EXPORT_OP_CLOSE_BEFORE_UNLINK) + nfsd_close_cached_files(rdentry); + + for (retries = 1;;) { + host_err = vfs_unlink(&init_user_ns, dirp, rdentry, NULL); + if (host_err != -EAGAIN || !retries--) + break; + if (!nfsd_wait_for_delegreturn(rqstp, rinode)) + break; + } } else { - host_err = vfs_rmdir(dirp, rdentry); + host_err = vfs_rmdir(&init_user_ns, dirp, rdentry); } + fh_fill_post_attrs(fhp); + inode_unlock(dirp); if (!host_err) host_err = commit_metadata(fhp); dput(rdentry); + iput(rinode); /* truncate the inode here */ out_drop_write: fh_drop_write(fhp); @@ -1879,6 +1827,9 @@ out_nfserr: } out: return err; +out_unlock: + inode_unlock(dirp); + goto out_drop_write; } /* @@ -1903,7 +1854,7 @@ struct readdir_data { int full; }; -static int nfsd_buffered_filldir(struct dir_context *ctx, const char *name, +static bool nfsd_buffered_filldir(struct dir_context *ctx, const char *name, int namlen, loff_t offset, u64 ino, unsigned int d_type) { @@ -1915,7 +1866,7 @@ static int nfsd_buffered_filldir(struct dir_context *ctx, const char *name, reclen = ALIGN(sizeof(struct buffered_dirent) + namlen, sizeof(u64)); if (buf->used + reclen > PAGE_SIZE) { buf->full = 1; - return -EINVAL; + return false; } de->namlen = namlen; @@ -1925,11 +1876,12 @@ static int nfsd_buffered_filldir(struct dir_context *ctx, const char *name, memcpy(de->name, name, namlen); buf->used += reclen; - return 0; + return true; } -static __be32 nfsd_buffered_readdir(struct file *file, nfsd_filldir_t func, - struct readdir_cd *cdp, loff_t *offsetp) +static __be32 nfsd_buffered_readdir(struct file *file, struct svc_fh *fhp, + nfsd_filldir_t func, struct readdir_cd *cdp, + loff_t *offsetp) { struct buffered_dirent *de; int host_err; @@ -1975,6 +1927,8 @@ static __be32 nfsd_buffered_readdir(struct file *file, nfsd_filldir_t func, if (cdp->err != nfs_ok) break; + trace_nfsd_dirent(fhp, de->ino, de->name, de->namlen); + reclen = ALIGN(sizeof(*de) + de->namlen, sizeof(u64)); size -= reclen; @@ -2022,7 +1976,7 @@ nfsd_readdir(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t *offsetp, goto out_close; } - err = nfsd_buffered_readdir(file, func, cdp, offsetp); + err = nfsd_buffered_readdir(file, fhp, func, cdp, offsetp); if (err == nfserr_eof || err == nfserr_toosmall) err = nfs_ok; /* can still be found in ->err */ @@ -2058,6 +2012,242 @@ static int exp_rdonly(struct svc_rqst *rqstp, struct svc_export *exp) return nfsexp_flags(rqstp, exp) & NFSEXP_READONLY; } +#ifdef CONFIG_NFSD_V4 +/* + * Helper function to translate error numbers. In the case of xattr operations, + * some error codes need to be translated outside of the standard translations. + * + * ENODATA needs to be translated to nfserr_noxattr. + * E2BIG to nfserr_xattr2big. + * + * Additionally, vfs_listxattr can return -ERANGE. This means that the + * file has too many extended attributes to retrieve inside an + * XATTR_LIST_MAX sized buffer. This is a bug in the xattr implementation: + * filesystems will allow the adding of extended attributes until they hit + * their own internal limit. This limit may be larger than XATTR_LIST_MAX. + * So, at that point, the attributes are present and valid, but can't + * be retrieved using listxattr, since the upper level xattr code enforces + * the XATTR_LIST_MAX limit. + * + * This bug means that we need to deal with listxattr returning -ERANGE. The + * best mapping is to return TOOSMALL. + */ +static __be32 +nfsd_xattr_errno(int err) +{ + switch (err) { + case -ENODATA: + return nfserr_noxattr; + case -E2BIG: + return nfserr_xattr2big; + case -ERANGE: + return nfserr_toosmall; + } + return nfserrno(err); +} + +/* + * Retrieve the specified user extended attribute. To avoid always + * having to allocate the maximum size (since we are not getting + * a maximum size from the RPC), do a probe + alloc. Hold a reader + * lock on i_rwsem to prevent the extended attribute from changing + * size while we're doing this. + */ +__be32 +nfsd_getxattr(struct svc_rqst *rqstp, struct svc_fh *fhp, char *name, + void **bufp, int *lenp) +{ + ssize_t len; + __be32 err; + char *buf; + struct inode *inode; + struct dentry *dentry; + + err = fh_verify(rqstp, fhp, 0, NFSD_MAY_READ); + if (err) + return err; + + err = nfs_ok; + dentry = fhp->fh_dentry; + inode = d_inode(dentry); + + inode_lock_shared(inode); + + len = vfs_getxattr(&init_user_ns, dentry, name, NULL, 0); + + /* + * Zero-length attribute, just return. + */ + if (len == 0) { + *bufp = NULL; + *lenp = 0; + goto out; + } + + if (len < 0) { + err = nfsd_xattr_errno(len); + goto out; + } + + if (len > *lenp) { + err = nfserr_toosmall; + goto out; + } + + buf = kvmalloc(len, GFP_KERNEL | GFP_NOFS); + if (buf == NULL) { + err = nfserr_jukebox; + goto out; + } + + len = vfs_getxattr(&init_user_ns, dentry, name, buf, len); + if (len <= 0) { + kvfree(buf); + buf = NULL; + err = nfsd_xattr_errno(len); + } + + *lenp = len; + *bufp = buf; + +out: + inode_unlock_shared(inode); + + return err; +} + +/* + * Retrieve the xattr names. Since we can't know how many are + * user extended attributes, we must get all attributes here, + * and have the XDR encode filter out the "user." ones. + * + * While this could always just allocate an XATTR_LIST_MAX + * buffer, that's a waste, so do a probe + allocate. To + * avoid any changes between the probe and allocate, wrap + * this in inode_lock. + */ +__be32 +nfsd_listxattr(struct svc_rqst *rqstp, struct svc_fh *fhp, char **bufp, + int *lenp) +{ + ssize_t len; + __be32 err; + char *buf; + struct inode *inode; + struct dentry *dentry; + + err = fh_verify(rqstp, fhp, 0, NFSD_MAY_READ); + if (err) + return err; + + dentry = fhp->fh_dentry; + inode = d_inode(dentry); + *lenp = 0; + + inode_lock_shared(inode); + + len = vfs_listxattr(dentry, NULL, 0); + if (len <= 0) { + err = nfsd_xattr_errno(len); + goto out; + } + + if (len > XATTR_LIST_MAX) { + err = nfserr_xattr2big; + goto out; + } + + /* + * We're holding i_rwsem - use GFP_NOFS. + */ + buf = kvmalloc(len, GFP_KERNEL | GFP_NOFS); + if (buf == NULL) { + err = nfserr_jukebox; + goto out; + } + + len = vfs_listxattr(dentry, buf, len); + if (len <= 0) { + kvfree(buf); + err = nfsd_xattr_errno(len); + goto out; + } + + *lenp = len; + *bufp = buf; + + err = nfs_ok; +out: + inode_unlock_shared(inode); + + return err; +} + +/** + * nfsd_removexattr - Remove an extended attribute + * @rqstp: RPC transaction being executed + * @fhp: NFS filehandle of object with xattr to remove + * @name: name of xattr to remove (NUL-terminate) + * + * Pass in a NULL pointer for delegated_inode, and let the client deal + * with NFS4ERR_DELAY (same as with e.g. setattr and remove). + * + * Returns nfs_ok on success, or an nfsstat in network byte order. + */ +__be32 +nfsd_removexattr(struct svc_rqst *rqstp, struct svc_fh *fhp, char *name) +{ + __be32 err; + int ret; + + err = fh_verify(rqstp, fhp, 0, NFSD_MAY_WRITE); + if (err) + return err; + + ret = fh_want_write(fhp); + if (ret) + return nfserrno(ret); + + inode_lock(fhp->fh_dentry->d_inode); + fh_fill_pre_attrs(fhp); + + ret = __vfs_removexattr_locked(&init_user_ns, fhp->fh_dentry, + name, NULL); + + fh_fill_post_attrs(fhp); + inode_unlock(fhp->fh_dentry->d_inode); + fh_drop_write(fhp); + + return nfsd_xattr_errno(ret); +} + +__be32 +nfsd_setxattr(struct svc_rqst *rqstp, struct svc_fh *fhp, char *name, + void *buf, u32 len, u32 flags) +{ + __be32 err; + int ret; + + err = fh_verify(rqstp, fhp, 0, NFSD_MAY_WRITE); + if (err) + return err; + + ret = fh_want_write(fhp); + if (ret) + return nfserrno(ret); + inode_lock(fhp->fh_dentry->d_inode); + fh_fill_pre_attrs(fhp); + + ret = __vfs_setxattr_locked(&init_user_ns, fhp->fh_dentry, name, buf, + len, flags, NULL); + fh_fill_post_attrs(fhp); + inode_unlock(fhp->fh_dentry->d_inode); + fh_drop_write(fhp); + + return nfsd_xattr_errno(ret); +} +#endif + /* * Check for a user's access permissions to this inode. */ @@ -2132,13 +2322,14 @@ nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp, return 0; /* This assumes NFSD_MAY_{READ,WRITE,EXEC} == MAY_{READ,WRITE,EXEC} */ - err = inode_permission(inode, acc & (MAY_READ|MAY_WRITE|MAY_EXEC)); + err = inode_permission(&init_user_ns, inode, + acc & (MAY_READ | MAY_WRITE | MAY_EXEC)); /* Allow read access to binaries even when mode 111 */ if (err == -EACCES && S_ISREG(inode->i_mode) && (acc == (NFSD_MAY_READ | NFSD_MAY_OWNER_OVERRIDE) || acc == (NFSD_MAY_READ | NFSD_MAY_READ_IF_EXEC))) - err = inode_permission(inode, MAY_EXEC); + err = inode_permission(&init_user_ns, inode, MAY_EXEC); return err? nfserrno(err) : 0; } diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h index 3eb660ad80d1..120521bc7b24 100644 --- a/fs/nfsd/vfs.h +++ b/fs/nfsd/vfs.h @@ -6,6 +6,8 @@ #ifndef LINUX_NFSD_VFS_H #define LINUX_NFSD_VFS_H +#include <linux/fs.h> +#include <linux/posix_acl.h> #include "nfsfh.h" #include "nfsd.h" @@ -42,6 +44,22 @@ struct nfsd_file; typedef int (*nfsd_filldir_t)(void *, const char *, int, loff_t, u64, unsigned); /* nfsd/vfs.c */ +struct nfsd_attrs { + struct iattr *na_iattr; /* input */ + struct xdr_netobj *na_seclabel; /* input */ + struct posix_acl *na_pacl; /* input */ + struct posix_acl *na_dpacl; /* input */ + + int na_labelerr; /* output */ + int na_aclerr; /* output */ +}; + +static inline void nfsd_attrs_free(struct nfsd_attrs *attrs) +{ + posix_acl_release(attrs->na_pacl); + posix_acl_release(attrs->na_dpacl); +} + int nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp, struct svc_export **expp); __be32 nfsd_lookup(struct svc_rqst *, struct svc_fh *, @@ -50,36 +68,41 @@ __be32 nfsd_lookup_dentry(struct svc_rqst *, struct svc_fh *, const char *, unsigned int, struct svc_export **, struct dentry **); __be32 nfsd_setattr(struct svc_rqst *, struct svc_fh *, - struct iattr *, int, time64_t); + struct nfsd_attrs *, int, time64_t); int nfsd_mountpoint(struct dentry *, struct svc_export *); #ifdef CONFIG_NFSD_V4 -__be32 nfsd4_set_nfs4_label(struct svc_rqst *, struct svc_fh *, - struct xdr_netobj *); __be32 nfsd4_vfs_fallocate(struct svc_rqst *, struct svc_fh *, struct file *, loff_t, loff_t, int); -__be32 nfsd4_clone_file_range(struct nfsd_file *nf_src, u64 src_pos, +__be32 nfsd4_clone_file_range(struct svc_rqst *rqstp, + struct nfsd_file *nf_src, u64 src_pos, struct nfsd_file *nf_dst, u64 dst_pos, u64 count, bool sync); #endif /* CONFIG_NFSD_V4 */ __be32 nfsd_create_locked(struct svc_rqst *, struct svc_fh *, - char *name, int len, struct iattr *attrs, - int type, dev_t rdev, struct svc_fh *res); + struct nfsd_attrs *attrs, int type, dev_t rdev, + struct svc_fh *res); __be32 nfsd_create(struct svc_rqst *, struct svc_fh *, - char *name, int len, struct iattr *attrs, + char *name, int len, struct nfsd_attrs *attrs, int type, dev_t rdev, struct svc_fh *res); -#ifdef CONFIG_NFSD_V3 __be32 nfsd_access(struct svc_rqst *, struct svc_fh *, u32 *, u32 *); -__be32 do_nfsd_create(struct svc_rqst *, struct svc_fh *, - char *name, int len, struct iattr *attrs, - struct svc_fh *res, int createmode, - u32 *verifier, bool *truncp, bool *created); -__be32 nfsd_commit(struct svc_rqst *, struct svc_fh *, - loff_t, unsigned long, __be32 *verf); -#endif /* CONFIG_NFSD_V3 */ +__be32 nfsd_create_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, + struct svc_fh *resfhp, struct nfsd_attrs *iap); +__be32 nfsd_commit(struct svc_rqst *rqst, struct svc_fh *fhp, + u64 offset, u32 count, __be32 *verf); +#ifdef CONFIG_NFSD_V4 +__be32 nfsd_getxattr(struct svc_rqst *rqstp, struct svc_fh *fhp, + char *name, void **bufp, int *lenp); +__be32 nfsd_listxattr(struct svc_rqst *rqstp, struct svc_fh *fhp, + char **bufp, int *lenp); +__be32 nfsd_removexattr(struct svc_rqst *rqstp, struct svc_fh *fhp, + char *name); +__be32 nfsd_setxattr(struct svc_rqst *rqstp, struct svc_fh *fhp, + char *name, void *buf, u32 len, u32 flags); +#endif int nfsd_open_break_lease(struct inode *, int); __be32 nfsd_open(struct svc_rqst *, struct svc_fh *, umode_t, int, struct file **); -__be32 nfsd_open_verified(struct svc_rqst *, struct svc_fh *, umode_t, +__be32 nfsd_open_verified(struct svc_rqst *, struct svc_fh *, int, struct file **); __be32 nfsd_splice_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, loff_t offset, @@ -103,8 +126,9 @@ __be32 nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, __be32 nfsd_readlink(struct svc_rqst *, struct svc_fh *, char *, int *); __be32 nfsd_symlink(struct svc_rqst *, struct svc_fh *, - char *name, int len, char *path, - struct svc_fh *res); + char *name, int len, char *path, + struct nfsd_attrs *attrs, + struct svc_fh *res); __be32 nfsd_link(struct svc_rqst *, struct svc_fh *, char *, int, struct svc_fh *); ssize_t nfsd_copy_file_range(struct file *, u64, @@ -142,7 +166,7 @@ static inline void fh_drop_write(struct svc_fh *fh) } } -static inline __be32 fh_getattr(struct svc_fh *fh, struct kstat *stat) +static inline __be32 fh_getattr(const struct svc_fh *fh, struct kstat *stat) { struct path p = {.mnt = fh->fh_export->ex_path.mnt, .dentry = fh->fh_dentry}; @@ -150,10 +174,4 @@ static inline __be32 fh_getattr(struct svc_fh *fh, struct kstat *stat) AT_STATX_SYNC_AS_STAT)); } -static inline int nfsd_create_is_exclusive(int createmode) -{ - return createmode == NFS3_CREATE_EXCLUSIVE - || createmode == NFS4_CREATE_EXCLUSIVE4_1; -} - #endif /* LINUX_NFSD_VFS_H */ diff --git a/fs/nfsd/xdr.h b/fs/nfsd/xdr.h index ea7cca3a64b7..852f71580bd0 100644 --- a/fs/nfsd/xdr.h +++ b/fs/nfsd/xdr.h @@ -27,14 +27,13 @@ struct nfsd_readargs { struct svc_fh fh; __u32 offset; __u32 count; - int vlen; }; struct nfsd_writeargs { svc_fh fh; __u32 offset; - int len; - struct kvec first; + __u32 len; + struct xdr_buf payload; }; struct nfsd_createargs { @@ -53,11 +52,6 @@ struct nfsd_renameargs { unsigned int tlen; }; -struct nfsd_readlinkargs { - struct svc_fh fh; - char * buffer; -}; - struct nfsd_linkargs { struct svc_fh ffh; struct svc_fh tfh; @@ -79,39 +73,53 @@ struct nfsd_readdirargs { struct svc_fh fh; __u32 cookie; __u32 count; - __be32 * buffer; +}; + +struct nfsd_stat { + __be32 status; }; struct nfsd_attrstat { + __be32 status; struct svc_fh fh; struct kstat stat; }; struct nfsd_diropres { + __be32 status; struct svc_fh fh; struct kstat stat; }; struct nfsd_readlinkres { + __be32 status; int len; + struct page *page; }; struct nfsd_readres { + __be32 status; struct svc_fh fh; unsigned long count; struct kstat stat; + struct page **pages; }; struct nfsd_readdirres { + /* Components of the reply */ + __be32 status; + int count; + /* Used to encode the reply's entry list */ + struct xdr_stream xdr; + struct xdr_buf dirlist; struct readdir_cd common; - __be32 * buffer; - int buflen; - __be32 * offset; + unsigned int cookie_offset; }; struct nfsd_statfsres { + __be32 status; struct kstatfs stats; }; @@ -133,33 +141,37 @@ union nfsd_xdrstore { #define NFS2_SVC_XDRSIZE sizeof(union nfsd_xdrstore) -int nfssvc_decode_void(struct svc_rqst *, __be32 *); -int nfssvc_decode_fhandle(struct svc_rqst *, __be32 *); -int nfssvc_decode_sattrargs(struct svc_rqst *, __be32 *); -int nfssvc_decode_diropargs(struct svc_rqst *, __be32 *); -int nfssvc_decode_readargs(struct svc_rqst *, __be32 *); -int nfssvc_decode_writeargs(struct svc_rqst *, __be32 *); -int nfssvc_decode_createargs(struct svc_rqst *, __be32 *); -int nfssvc_decode_renameargs(struct svc_rqst *, __be32 *); -int nfssvc_decode_readlinkargs(struct svc_rqst *, __be32 *); -int nfssvc_decode_linkargs(struct svc_rqst *, __be32 *); -int nfssvc_decode_symlinkargs(struct svc_rqst *, __be32 *); -int nfssvc_decode_readdirargs(struct svc_rqst *, __be32 *); -int nfssvc_encode_void(struct svc_rqst *, __be32 *); -int nfssvc_encode_attrstat(struct svc_rqst *, __be32 *); -int nfssvc_encode_diropres(struct svc_rqst *, __be32 *); -int nfssvc_encode_readlinkres(struct svc_rqst *, __be32 *); -int nfssvc_encode_readres(struct svc_rqst *, __be32 *); -int nfssvc_encode_statfsres(struct svc_rqst *, __be32 *); -int nfssvc_encode_readdirres(struct svc_rqst *, __be32 *); - -int nfssvc_encode_entry(void *, const char *name, - int namlen, loff_t offset, u64 ino, unsigned int); - -void nfssvc_release_fhandle(struct svc_rqst *); +bool nfssvc_decode_fhandleargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfssvc_decode_sattrargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfssvc_decode_diropargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfssvc_decode_readargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfssvc_decode_writeargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfssvc_decode_createargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfssvc_decode_renameargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfssvc_decode_linkargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfssvc_decode_symlinkargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfssvc_decode_readdirargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); + +bool nfssvc_encode_statres(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfssvc_encode_attrstatres(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfssvc_encode_diropres(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfssvc_encode_readlinkres(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfssvc_encode_readres(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfssvc_encode_statfsres(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfssvc_encode_readdirres(struct svc_rqst *rqstp, struct xdr_stream *xdr); + +void nfssvc_encode_nfscookie(struct nfsd_readdirres *resp, u32 offset); +int nfssvc_encode_entry(void *data, const char *name, int namlen, + loff_t offset, u64 ino, unsigned int d_type); + +void nfssvc_release_attrstat(struct svc_rqst *rqstp); +void nfssvc_release_diropres(struct svc_rqst *rqstp); +void nfssvc_release_readres(struct svc_rqst *rqstp); /* Helper functions for NFSv2 ACL code */ -__be32 *nfs2svc_encode_fattr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp, struct kstat *stat); -__be32 *nfs2svc_decode_fh(__be32 *p, struct svc_fh *fhp); +bool svcxdr_decode_fhandle(struct xdr_stream *xdr, struct svc_fh *fhp); +bool svcxdr_encode_stat(struct xdr_stream *xdr, __be32 status); +bool svcxdr_encode_fattr(struct svc_rqst *rqstp, struct xdr_stream *xdr, + const struct svc_fh *fhp, const struct kstat *stat); #endif /* LINUX_NFSD_H */ diff --git a/fs/nfsd/xdr3.h b/fs/nfsd/xdr3.h index 4155fd71714c..03fe4e21306c 100644 --- a/fs/nfsd/xdr3.h +++ b/fs/nfsd/xdr3.h @@ -25,14 +25,13 @@ struct nfsd3_diropargs { struct nfsd3_accessargs { struct svc_fh fh; - unsigned int access; + __u32 access; }; struct nfsd3_readargs { struct svc_fh fh; __u64 offset; __u32 count; - int vlen; }; struct nfsd3_writeargs { @@ -41,7 +40,7 @@ struct nfsd3_writeargs { __u32 count; int stable; __u32 len; - struct kvec first; + struct xdr_buf payload; }; struct nfsd3_createargs { @@ -71,11 +70,6 @@ struct nfsd3_renameargs { unsigned int tlen; }; -struct nfsd3_readlinkargs { - struct svc_fh fh; - char * buffer; -}; - struct nfsd3_linkargs { struct svc_fh ffh; struct svc_fh tfh; @@ -96,10 +90,8 @@ struct nfsd3_symlinkargs { struct nfsd3_readdirargs { struct svc_fh fh; __u64 cookie; - __u32 dircount; __u32 count; __be32 * verf; - __be32 * buffer; }; struct nfsd3_commitargs { @@ -110,13 +102,13 @@ struct nfsd3_commitargs { struct nfsd3_getaclargs { struct svc_fh fh; - int mask; + __u32 mask; }; struct posix_acl; struct nfsd3_setaclargs { struct svc_fh fh; - int mask; + __u32 mask; struct posix_acl *acl_access; struct posix_acl *acl_default; }; @@ -145,6 +137,7 @@ struct nfsd3_readlinkres { __be32 status; struct svc_fh fh; __u32 len; + struct page **pages; }; struct nfsd3_readres { @@ -152,6 +145,7 @@ struct nfsd3_readres { struct svc_fh fh; unsigned long count; __u32 eof; + struct page **pages; }; struct nfsd3_writeres { @@ -175,19 +169,17 @@ struct nfsd3_linkres { }; struct nfsd3_readdirres { + /* Components of the reply */ __be32 status; struct svc_fh fh; - /* Just to save kmalloc on every readdirplus entry (svc_fh is a - * little large for the stack): */ - struct svc_fh scratch; - int count; __be32 verf[2]; + /* Used to encode the reply's entry list */ + struct xdr_stream xdr; + struct xdr_buf dirlist; + struct svc_fh scratch; struct readdir_cd common; - __be32 * buffer; - int buflen; - __be32 * offset; - __be32 * offset1; + unsigned int cookie_offset; struct svc_rqst * rqstp; }; @@ -273,51 +265,50 @@ union nfsd3_xdrstore { #define NFS3_SVC_XDRSIZE sizeof(union nfsd3_xdrstore) -int nfs3svc_decode_fhandle(struct svc_rqst *, __be32 *); -int nfs3svc_decode_sattrargs(struct svc_rqst *, __be32 *); -int nfs3svc_decode_diropargs(struct svc_rqst *, __be32 *); -int nfs3svc_decode_accessargs(struct svc_rqst *, __be32 *); -int nfs3svc_decode_readargs(struct svc_rqst *, __be32 *); -int nfs3svc_decode_writeargs(struct svc_rqst *, __be32 *); -int nfs3svc_decode_createargs(struct svc_rqst *, __be32 *); -int nfs3svc_decode_mkdirargs(struct svc_rqst *, __be32 *); -int nfs3svc_decode_mknodargs(struct svc_rqst *, __be32 *); -int nfs3svc_decode_renameargs(struct svc_rqst *, __be32 *); -int nfs3svc_decode_readlinkargs(struct svc_rqst *, __be32 *); -int nfs3svc_decode_linkargs(struct svc_rqst *, __be32 *); -int nfs3svc_decode_symlinkargs(struct svc_rqst *, __be32 *); -int nfs3svc_decode_readdirargs(struct svc_rqst *, __be32 *); -int nfs3svc_decode_readdirplusargs(struct svc_rqst *, __be32 *); -int nfs3svc_decode_commitargs(struct svc_rqst *, __be32 *); -int nfs3svc_encode_voidres(struct svc_rqst *, __be32 *); -int nfs3svc_encode_attrstat(struct svc_rqst *, __be32 *); -int nfs3svc_encode_wccstat(struct svc_rqst *, __be32 *); -int nfs3svc_encode_diropres(struct svc_rqst *, __be32 *); -int nfs3svc_encode_accessres(struct svc_rqst *, __be32 *); -int nfs3svc_encode_readlinkres(struct svc_rqst *, __be32 *); -int nfs3svc_encode_readres(struct svc_rqst *, __be32 *); -int nfs3svc_encode_writeres(struct svc_rqst *, __be32 *); -int nfs3svc_encode_createres(struct svc_rqst *, __be32 *); -int nfs3svc_encode_renameres(struct svc_rqst *, __be32 *); -int nfs3svc_encode_linkres(struct svc_rqst *, __be32 *); -int nfs3svc_encode_readdirres(struct svc_rqst *, __be32 *); -int nfs3svc_encode_fsstatres(struct svc_rqst *, __be32 *); -int nfs3svc_encode_fsinfores(struct svc_rqst *, __be32 *); -int nfs3svc_encode_pathconfres(struct svc_rqst *, __be32 *); -int nfs3svc_encode_commitres(struct svc_rqst *, __be32 *); +bool nfs3svc_decode_fhandleargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfs3svc_decode_sattrargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfs3svc_decode_diropargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfs3svc_decode_accessargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfs3svc_decode_readargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfs3svc_decode_writeargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfs3svc_decode_createargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfs3svc_decode_mkdirargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfs3svc_decode_mknodargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfs3svc_decode_renameargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfs3svc_decode_linkargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfs3svc_decode_readdirargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfs3svc_decode_readdirplusargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfs3svc_decode_commitargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); + +bool nfs3svc_encode_getattrres(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfs3svc_encode_wccstat(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfs3svc_encode_lookupres(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfs3svc_encode_accessres(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfs3svc_encode_readlinkres(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfs3svc_encode_readres(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfs3svc_encode_writeres(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfs3svc_encode_createres(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfs3svc_encode_renameres(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfs3svc_encode_linkres(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfs3svc_encode_readdirres(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfs3svc_encode_fsstatres(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfs3svc_encode_fsinfores(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfs3svc_encode_pathconfres(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfs3svc_encode_commitres(struct svc_rqst *rqstp, struct xdr_stream *xdr); void nfs3svc_release_fhandle(struct svc_rqst *); void nfs3svc_release_fhandle2(struct svc_rqst *); -int nfs3svc_encode_entry(void *, const char *name, - int namlen, loff_t offset, u64 ino, - unsigned int); -int nfs3svc_encode_entry_plus(void *, const char *name, - int namlen, loff_t offset, u64 ino, - unsigned int); -/* Helper functions for NFSv3 ACL code */ -__be32 *nfs3svc_encode_post_op_attr(struct svc_rqst *rqstp, __be32 *p, - struct svc_fh *fhp); -__be32 *nfs3svc_decode_fh(__be32 *p, struct svc_fh *fhp); +void nfs3svc_encode_cookie3(struct nfsd3_readdirres *resp, u64 offset); +int nfs3svc_encode_entry3(void *data, const char *name, int namlen, + loff_t offset, u64 ino, unsigned int d_type); +int nfs3svc_encode_entryplus3(void *data, const char *name, int namlen, + loff_t offset, u64 ino, unsigned int d_type); +/* Helper functions for NFSv3 ACL code */ +bool svcxdr_decode_nfs_fh3(struct xdr_stream *xdr, struct svc_fh *fhp); +bool svcxdr_encode_nfsstat3(struct xdr_stream *xdr, __be32 status); +bool svcxdr_encode_post_op_attr(struct svc_rqst *rqstp, struct xdr_stream *xdr, + const struct svc_fh *fhp); #endif /* _LINUX_NFSD_XDR3_H */ diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index db63d39b1507..0eb00105d845 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -76,12 +76,7 @@ static inline bool nfsd4_has_session(struct nfsd4_compound_state *cs) struct nfsd4_change_info { u32 atomic; - bool change_supported; - u32 before_ctime_sec; - u32 before_ctime_nsec; u64 before_change; - u32 after_ctime_sec; - u32 after_ctime_nsec; u64 after_change; }; @@ -224,9 +219,36 @@ struct nfsd4_putfh { bool no_verify; /* represents foreigh fh */ }; +struct nfsd4_getxattr { + char *getxa_name; /* request */ + u32 getxa_len; /* request */ + void *getxa_buf; +}; + +struct nfsd4_setxattr { + u32 setxa_flags; /* request */ + char *setxa_name; /* request */ + char *setxa_buf; /* request */ + u32 setxa_len; /* request */ + struct nfsd4_change_info setxa_cinfo; /* response */ +}; + +struct nfsd4_removexattr { + char *rmxa_name; /* request */ + struct nfsd4_change_info rmxa_cinfo; /* response */ +}; + +struct nfsd4_listxattrs { + u64 lsxa_cookie; /* request */ + u32 lsxa_maxcount; /* request */ + char *lsxa_buf; /* unfiltered buffer (reply) */ + u32 lsxa_len; /* unfiltered len (reply) */ +}; + struct nfsd4_open { u32 op_claim_type; /* request */ - struct xdr_netobj op_fname; /* request - everything but CLAIM_PREV */ + u32 op_fnamelen; + char * op_fname; /* request - everything but CLAIM_PREV */ u32 op_delegate_type; /* request - CLAIM_PREV only */ stateid_t op_delegate_stateid; /* request - response */ u32 op_why_no_deleg; /* response - DELEG_NONE_EXT only */ @@ -251,11 +273,13 @@ struct nfsd4_open { bool op_truncate; /* used during processing */ bool op_created; /* used during processing */ struct nfs4_openowner *op_openowner; /* used during processing */ + struct file *op_filp; /* used during processing */ struct nfs4_file *op_file; /* used during processing */ struct nfs4_ol_stateid *op_stp; /* used during processing */ struct nfs4_clnt_odstate *op_odstate; /* used during processing */ struct nfs4_acl *op_acl; struct xdr_netobj op_label; + struct svc_rqst *op_rqstp; }; struct nfsd4_open_confirm { @@ -279,9 +303,10 @@ struct nfsd4_read { u32 rd_length; /* request */ int rd_vlen; struct nfsd_file *rd_nf; - + struct svc_rqst *rd_rqstp; /* response */ - struct svc_fh *rd_fhp; /* response */ + struct svc_fh *rd_fhp; /* response */ + u32 rd_eof; /* response */ }; struct nfsd4_readdir { @@ -359,13 +384,6 @@ struct nfsd4_setclientid_confirm { nfs4_verifier sc_confirm; }; -struct nfsd4_saved_compoundargs { - __be32 *p; - __be32 *end; - int pagelen; - struct page **pagelist; -}; - struct nfsd4_test_stateid_id { __be32 ts_id_status; stateid_t ts_id_stateid; @@ -393,8 +411,7 @@ struct nfsd4_write { u64 wr_offset; /* request */ u32 wr_stable_how; /* request */ u32 wr_buflen; /* request */ - struct kvec wr_head; - struct page ** wr_pagelist; /* request */ + struct xdr_buf wr_payload; /* request */ u32 wr_bytes_written; /* response */ u32 wr_how_written; /* response */ @@ -407,7 +424,7 @@ struct nfsd4_exchange_id { u32 flags; clientid_t clientid; u32 seqid; - int spa_how; + u32 spa_how; u32 spo_must_enforce[3]; u32 spo_must_allow[3]; struct xdr_netobj nii_domain; @@ -517,6 +534,13 @@ struct nfsd42_write_res { stateid_t cb_stateid; }; +struct nfsd4_cb_offload { + struct nfsd4_callback co_cb; + struct nfsd42_write_res co_res; + __be32 co_nfserr; + struct knfsd_fh co_fh; +}; + struct nfsd4_copy { /* request */ stateid_t cp_src_stateid; @@ -524,18 +548,16 @@ struct nfsd4_copy { u64 cp_src_pos; u64 cp_dst_pos; u64 cp_count; - struct nl4_server cp_src; - bool cp_intra; + struct nl4_server *cp_src; - /* both */ - bool cp_synchronous; + unsigned long cp_flags; +#define NFSD4_COPY_F_STOPPED (0) +#define NFSD4_COPY_F_INTRA (1) +#define NFSD4_COPY_F_SYNCHRONOUS (2) +#define NFSD4_COPY_F_COMMITTED (3) /* response */ struct nfsd42_write_res cp_res; - - /* for cb_offload */ - struct nfsd4_callback cp_cb; - __be32 nfserr; struct knfsd_fh fh; struct nfs4_client *cp_clp; @@ -548,13 +570,34 @@ struct nfsd4_copy { struct list_head copies; struct task_struct *copy_task; refcount_t refcount; - bool stopped; struct vfsmount *ss_mnt; struct nfs_fh c_fh; nfs4_stateid stateid; }; -extern bool inter_copy_offload_enable; + +static inline void nfsd4_copy_set_sync(struct nfsd4_copy *copy, bool sync) +{ + if (sync) + set_bit(NFSD4_COPY_F_SYNCHRONOUS, ©->cp_flags); + else + clear_bit(NFSD4_COPY_F_SYNCHRONOUS, ©->cp_flags); +} + +static inline bool nfsd4_copy_is_sync(const struct nfsd4_copy *copy) +{ + return test_bit(NFSD4_COPY_F_SYNCHRONOUS, ©->cp_flags); +} + +static inline bool nfsd4_copy_is_async(const struct nfsd4_copy *copy) +{ + return !test_bit(NFSD4_COPY_F_SYNCHRONOUS, ©->cp_flags); +} + +static inline bool nfsd4_ssc_is_inter(const struct nfsd4_copy *copy) +{ + return !test_bit(NFSD4_COPY_F_INTRA, ©->cp_flags); +} struct nfsd4_seek { /* request */ @@ -579,19 +622,20 @@ struct nfsd4_offload_status { struct nfsd4_copy_notify { /* request */ stateid_t cpn_src_stateid; - struct nl4_server cpn_dst; + struct nl4_server *cpn_dst; /* response */ stateid_t cpn_cnr_stateid; u64 cpn_sec; u32 cpn_nsec; - struct nl4_server cpn_src; + struct nl4_server *cpn_src; }; struct nfsd4_op { - int opnum; - const struct nfsd4_operation * opdesc; + u32 opnum; __be32 status; + const struct nfsd4_operation *opdesc; + struct nfs4_replay *replay; union nfsd4_op_u { struct nfsd4_access access; struct nfsd4_close close; @@ -649,8 +693,12 @@ struct nfsd4_op { struct nfsd4_offload_status offload_status; struct nfsd4_copy_notify copy_notify; struct nfsd4_seek seek; + + struct nfsd4_getxattr getxattr; + struct nfsd4_setxattr setxattr; + struct nfsd4_listxattrs listxattrs; + struct nfsd4_removexattr removexattr; } u; - struct nfs4_replay * replay; }; bool nfsd4_cache_this_op(struct nfsd4_op *); @@ -665,35 +713,29 @@ struct svcxdr_tmpbuf { struct nfsd4_compoundargs { /* scratch variables for XDR decode */ - __be32 * p; - __be32 * end; - struct page ** pagelist; - int pagelen; - bool tail; - __be32 tmp[8]; - __be32 * tmpp; + struct xdr_stream *xdr; struct svcxdr_tmpbuf *to_free; - struct svc_rqst *rqstp; - u32 taglen; char * tag; + u32 taglen; u32 minorversion; + u32 client_opcnt; u32 opcnt; struct nfsd4_op *ops; struct nfsd4_op iops[8]; - int cachetype; }; struct nfsd4_compoundres { /* scratch variables for XDR encode */ - struct xdr_stream xdr; + struct xdr_stream *xdr; struct svc_rqst * rqstp; - u32 taglen; + __be32 *statusp; char * tag; + u32 taglen; u32 opcnt; - __be32 * tagp; /* tag, opcount encode location */ + struct nfsd4_compound_state cstate; }; @@ -736,23 +778,16 @@ static inline void set_change_info(struct nfsd4_change_info *cinfo, struct svc_fh *fhp) { BUG_ON(!fhp->fh_pre_saved); - cinfo->atomic = (u32)fhp->fh_post_saved; - cinfo->change_supported = IS_I_VERSION(d_inode(fhp->fh_dentry)); + cinfo->atomic = (u32)(fhp->fh_post_saved && !fhp->fh_no_atomic_attr); cinfo->before_change = fhp->fh_pre_change; cinfo->after_change = fhp->fh_post_change; - cinfo->before_ctime_sec = fhp->fh_pre_ctime.tv_sec; - cinfo->before_ctime_nsec = fhp->fh_pre_ctime.tv_nsec; - cinfo->after_ctime_sec = fhp->fh_post_attr.ctime.tv_sec; - cinfo->after_ctime_nsec = fhp->fh_post_attr.ctime.tv_nsec; - } bool nfsd4_mach_creds_match(struct nfs4_client *cl, struct svc_rqst *rqstp); -int nfs4svc_encode_voidres(struct svc_rqst *, __be32 *); -int nfs4svc_decode_compoundargs(struct svc_rqst *, __be32 *); -int nfs4svc_encode_compoundres(struct svc_rqst *, __be32 *); +bool nfs4svc_decode_compoundargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); +bool nfs4svc_encode_compoundres(struct svc_rqst *rqstp, struct xdr_stream *xdr); __be32 nfsd4_check_resp_size(struct nfsd4_compoundres *, u32); void nfsd4_encode_operation(struct nfsd4_compoundres *, struct nfsd4_op *); void nfsd4_encode_replay(struct xdr_stream *xdr, struct nfsd4_op *op); @@ -853,7 +888,8 @@ struct nfsd4_operation { u32 op_flags; char *op_name; /* Try to get response size before operation */ - u32 (*op_rsize_bop)(struct svc_rqst *, struct nfsd4_op *); + u32 (*op_rsize_bop)(const struct svc_rqst *rqstp, + const struct nfsd4_op *op); void (*op_get_currentstateid)(struct nfsd4_compound_state *, union nfsd4_op_u *); void (*op_set_currentstateid)(struct nfsd4_compound_state *, @@ -862,9 +898,3 @@ struct nfsd4_operation { #endif - -/* - * Local variables: - * c-basic-offset: 8 - * End: - */ |