From 1bf85d8c98756421e29b9990469ee63bb0bc87cc Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 27 Jun 2019 06:30:48 -0400 Subject: NFSv4: Handle open for execute correctly When mapping the NFSv4 context to an open mode and access mode, we need to treat the FMODE_EXEC flag differently. For the open mode, FMODE_EXEC means we need read share access. For the access mode checking, we need to verify that the user actually has execute access. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 6418cb6c079b..26626ea1f197 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1165,6 +1165,18 @@ static bool nfs4_clear_cap_atomic_open_v1(struct nfs_server *server, return true; } +static fmode_t _nfs4_ctx_to_accessmode(const struct nfs_open_context *ctx) +{ + return ctx->mode & (FMODE_READ|FMODE_WRITE|FMODE_EXEC); +} + +static fmode_t _nfs4_ctx_to_openmode(const struct nfs_open_context *ctx) +{ + fmode_t ret = ctx->mode & (FMODE_READ|FMODE_WRITE); + + return (ctx->mode & FMODE_EXEC) ? FMODE_READ | ret : ret; +} + static u32 nfs4_map_atomic_open_share(struct nfs_server *server, fmode_t fmode, int openflags) @@ -2900,14 +2912,13 @@ static unsigned nfs4_exclusive_attrset(struct nfs4_opendata *opendata, } static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata, - fmode_t fmode, - int flags, - struct nfs_open_context *ctx) + int flags, struct nfs_open_context *ctx) { struct nfs4_state_owner *sp = opendata->owner; struct nfs_server *server = sp->so_server; struct dentry *dentry; struct nfs4_state *state; + fmode_t acc_mode = _nfs4_ctx_to_accessmode(ctx); unsigned int seq; int ret; @@ -2946,7 +2957,8 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata, /* Parse layoutget results before we check for access */ pnfs_parse_lgopen(state->inode, opendata->lgp, ctx); - ret = nfs4_opendata_access(sp->so_cred, opendata, state, fmode, flags); + ret = nfs4_opendata_access(sp->so_cred, opendata, state, + acc_mode, flags); if (ret != 0) goto out; @@ -2978,7 +2990,7 @@ static int _nfs4_do_open(struct inode *dir, struct dentry *dentry = ctx->dentry; const struct cred *cred = ctx->cred; struct nfs4_threshold **ctx_th = &ctx->mdsthreshold; - fmode_t fmode = ctx->mode & (FMODE_READ|FMODE_WRITE|FMODE_EXEC); + fmode_t fmode = _nfs4_ctx_to_openmode(ctx); enum open_claim_type4 claim = NFS4_OPEN_CLAIM_NULL; struct iattr *sattr = c->sattr; struct nfs4_label *label = c->label; @@ -3024,7 +3036,7 @@ static int _nfs4_do_open(struct inode *dir, if (d_really_is_positive(dentry)) opendata->state = nfs4_get_open_state(d_inode(dentry), sp); - status = _nfs4_open_and_get_state(opendata, fmode, flags, ctx); + status = _nfs4_open_and_get_state(opendata, flags, ctx); if (status != 0) goto err_free_label; state = ctx->state; @@ -3594,9 +3606,9 @@ static void nfs4_close_context(struct nfs_open_context *ctx, int is_sync) if (ctx->state == NULL) return; if (is_sync) - nfs4_close_sync(ctx->state, ctx->mode); + nfs4_close_sync(ctx->state, _nfs4_ctx_to_openmode(ctx)); else - nfs4_close_state(ctx->state, ctx->mode); + nfs4_close_state(ctx->state, _nfs4_ctx_to_openmode(ctx)); } #define FATTR4_WORD1_NFS40_MASK (2*FATTR4_WORD1_MOUNTED_ON_FILEID - 1UL) -- cgit v1.2.3-59-g8ed1b From 44942b4e457beda00981f616402a1a791e8c616e Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 27 Jun 2019 06:41:45 -0400 Subject: NFSv4: Handle the special Linux file open access mode According to the open() manpage, Linux reserves the access mode 3 to mean "check for read and write permission on the file and return a file descriptor that can't be used for reading or writing." Currently, the NFSv4 code will ask the server to open the file, and will use an incorrect share access mode of 0. Since it has an incorrect share access mode, the client later forgets to send a corresponding close, meaning it can leak stateids on the server. Fixes: ce4ef7c0a8a05 ("NFS: Split out NFS v4 file operations") Cc: stable@vger.kernel.org # 3.6+ Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 1 + fs/nfs/nfs4file.c | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 0b4a1a974411..53777813ca95 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1100,6 +1100,7 @@ int nfs_open(struct inode *inode, struct file *filp) nfs_fscache_open_file(inode, filp); return 0; } +EXPORT_SYMBOL_GPL(nfs_open); /* * This function is called whenever some part of NFS notices that diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index cf42a8b939e3..3a507c42c1ca 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c @@ -49,7 +49,7 @@ nfs4_file_open(struct inode *inode, struct file *filp) return err; if ((openflags & O_ACCMODE) == 3) - openflags--; + return nfs_open(inode, filp); /* We can't create new files here */ openflags &= ~(O_CREAT|O_EXCL); -- cgit v1.2.3-59-g8ed1b From 996bc4f405d37ffd88c9b830202ee47fc7a6c449 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 24 Jan 2019 16:10:46 -0500 Subject: NFS: Create a root NFS directory in /sys/fs/nfs Signed-off-by: Trond Myklebust --- fs/nfs/Makefile | 3 ++- fs/nfs/inode.c | 8 +++++++ fs/nfs/sysfs.c | 69 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ fs/nfs/sysfs.h | 15 +++++++++++++ 4 files changed, 94 insertions(+), 1 deletion(-) create mode 100644 fs/nfs/sysfs.c create mode 100644 fs/nfs/sysfs.h (limited to 'fs') diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile index c587e3c4c6a6..34cdeaecccf6 100644 --- a/fs/nfs/Makefile +++ b/fs/nfs/Makefile @@ -8,7 +8,8 @@ obj-$(CONFIG_NFS_FS) += nfs.o CFLAGS_nfstrace.o += -I$(src) nfs-y := client.o dir.o file.o getroot.o inode.o super.o \ io.o direct.o pagelist.o read.o symlink.o unlink.o \ - write.o namespace.o mount_clnt.o nfstrace.o export.o + write.o namespace.o mount_clnt.o nfstrace.o \ + export.o sysfs.o nfs-$(CONFIG_ROOT_NFS) += nfsroot.o nfs-$(CONFIG_SYSCTL) += sysctl.o nfs-$(CONFIG_NFS_FSCACHE) += fscache.o fscache-index.o diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 53777813ca95..8dba56491de2 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -51,6 +51,7 @@ #include "pnfs.h" #include "nfs.h" #include "netns.h" +#include "sysfs.h" #include "nfstrace.h" @@ -2182,6 +2183,10 @@ static int __init init_nfs_fs(void) { int err; + err = nfs_sysfs_init(); + if (err < 0) + goto out10; + err = register_pernet_subsys(&nfs_net_ops); if (err < 0) goto out9; @@ -2245,6 +2250,8 @@ out7: out8: unregister_pernet_subsys(&nfs_net_ops); out9: + nfs_sysfs_exit(); +out10: return err; } @@ -2261,6 +2268,7 @@ static void __exit exit_nfs_fs(void) unregister_nfs_fs(); nfs_fs_proc_exit(); nfsiod_stop(); + nfs_sysfs_exit(); } /* Not quite true; I just maintain it */ diff --git a/fs/nfs/sysfs.c b/fs/nfs/sysfs.c new file mode 100644 index 000000000000..7070711ff6c5 --- /dev/null +++ b/fs/nfs/sysfs.c @@ -0,0 +1,69 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2019 Hammerspace Inc + */ + +#include +#include +#include +#include +#include +#include + +#include "sysfs.h" + +struct kobject *nfs_client_kobj; +static struct kset *nfs_client_kset; + +static void nfs_netns_object_release(struct kobject *kobj) +{ + kfree(kobj); +} + +static const struct kobj_ns_type_operations *nfs_netns_object_child_ns_type( + struct kobject *kobj) +{ + return &net_ns_type_operations; +} + +static struct kobj_type nfs_netns_object_type = { + .release = nfs_netns_object_release, + .sysfs_ops = &kobj_sysfs_ops, + .child_ns_type = nfs_netns_object_child_ns_type, +}; + +static struct kobject *nfs_netns_object_alloc(const char *name, + struct kset *kset, struct kobject *parent) +{ + struct kobject *kobj; + + kobj = kzalloc(sizeof(*kobj), GFP_KERNEL); + if (kobj) { + kobj->kset = kset; + if (kobject_init_and_add(kobj, &nfs_netns_object_type, + parent, "%s", name) == 0) + return kobj; + kobject_put(kobj); + } + return NULL; +} + +int nfs_sysfs_init(void) +{ + nfs_client_kset = kset_create_and_add("nfs", NULL, fs_kobj); + if (!nfs_client_kset) + return -ENOMEM; + nfs_client_kobj = nfs_netns_object_alloc("net", nfs_client_kset, NULL); + if (!nfs_client_kobj) { + kset_unregister(nfs_client_kset); + nfs_client_kset = NULL; + return -ENOMEM; + } + return 0; +} + +void nfs_sysfs_exit(void) +{ + kobject_put(nfs_client_kobj); + kset_unregister(nfs_client_kset); +} diff --git a/fs/nfs/sysfs.h b/fs/nfs/sysfs.h new file mode 100644 index 000000000000..666f8db2ba92 --- /dev/null +++ b/fs/nfs/sysfs.h @@ -0,0 +1,15 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2019 Hammerspace Inc + */ + +#ifndef __NFS_SYSFS_H +#define __NFS_SYSFS_H + + +extern struct kobject *nfs_client_kobj; + +extern int nfs_sysfs_init(void); +extern void nfs_sysfs_exit(void); + +#endif -- cgit v1.2.3-59-g8ed1b From 10b7a70cbb81a62353f763c3343ac0c48410696f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 6 Feb 2019 09:32:50 -0500 Subject: NFS: Cleanup - add nfs_clients_exit to mirror nfs_clients_init Add a helper to clean up the struct nfs_net when it is being destroyed. Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 13 +++++++++++-- fs/nfs/inode.c | 6 +----- fs/nfs/internal.h | 2 +- 3 files changed, 13 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/nfs/client.c b/fs/nfs/client.c index d7e4f0848e28..0acb104f7b00 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -192,7 +192,7 @@ error_0: EXPORT_SYMBOL_GPL(nfs_alloc_client); #if IS_ENABLED(CONFIG_NFS_V4) -void nfs_cleanup_cb_ident_idr(struct net *net) +static void nfs_cleanup_cb_ident_idr(struct net *net) { struct nfs_net *nn = net_generic(net, nfs_net_id); @@ -214,7 +214,7 @@ static void pnfs_init_server(struct nfs_server *server) } #else -void nfs_cleanup_cb_ident_idr(struct net *net) +static void nfs_cleanup_cb_ident_idr(struct net *net) { } @@ -1074,6 +1074,15 @@ void nfs_clients_init(struct net *net) nn->boot_time = ktime_get_real(); } +void nfs_clients_exit(struct net *net) +{ + struct nfs_net *nn = net_generic(net, nfs_net_id); + + nfs_cleanup_cb_ident_idr(net); + WARN_ON_ONCE(!list_empty(&nn->nfs_client_list)); + WARN_ON_ONCE(!list_empty(&nn->nfs_volume_list)); +} + #ifdef CONFIG_PROC_FS static void *nfs_server_list_start(struct seq_file *p, loff_t *pos); static void *nfs_server_list_next(struct seq_file *p, void *v, loff_t *pos); diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 8dba56491de2..f4944ef306a8 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -2161,12 +2161,8 @@ static int nfs_net_init(struct net *net) static void nfs_net_exit(struct net *net) { - struct nfs_net *nn = net_generic(net, nfs_net_id); - nfs_fs_proc_net_exit(net); - nfs_cleanup_cb_ident_idr(net); - WARN_ON_ONCE(!list_empty(&nn->nfs_client_list)); - WARN_ON_ONCE(!list_empty(&nn->nfs_volume_list)); + nfs_clients_exit(net); } static struct pernet_operations nfs_net_ops = { diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 498fab72f70b..9e87265907b8 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -158,6 +158,7 @@ extern void nfs_umount(const struct nfs_mount_request *info); /* client.c */ extern const struct rpc_program nfs_program; extern void nfs_clients_init(struct net *net); +extern void nfs_clients_exit(struct net *net); extern struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *); int nfs_create_rpc_client(struct nfs_client *, const struct nfs_client_initdata *, rpc_authflavor_t); struct nfs_client *nfs_get_client(const struct nfs_client_initdata *); @@ -170,7 +171,6 @@ int nfs_init_server_rpcclient(struct nfs_server *, const struct rpc_timeout *t, struct nfs_server *nfs_alloc_server(void); void nfs_server_copy_userdata(struct nfs_server *, struct nfs_server *); -extern void nfs_cleanup_cb_ident_idr(struct net *); extern void nfs_put_client(struct nfs_client *); extern void nfs_free_client(struct nfs_client *); extern struct nfs_client *nfs4_find_client_ident(struct net *, int); -- cgit v1.2.3-59-g8ed1b From 1c341b777501613aad83f9c233a3fe5701cff083 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 22 May 2019 08:38:57 -0400 Subject: NFS: Add deferred cache invalidation for close-to-open consistency violations If the client detects that close-to-open cache consistency has been violated, and that the file or directory has been changed on the server, then do a cache invalidation when we're done working with the file. The reason we don't do an immediate cache invalidation is that we want to avoid performance problems due to false positives. Also, note that we cannot guarantee cache consistency in this situation even if we do invalidate the cache. Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 4 ++++ fs/nfs/inode.c | 15 +++++++++++---- include/linux/nfs_fs.h | 2 ++ 3 files changed, 17 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 57b6a45576ad..bd1f9555447b 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -80,6 +80,10 @@ static struct nfs_open_dir_context *alloc_nfs_open_dir_context(struct inode *dir ctx->dup_cookie = 0; ctx->cred = get_cred(cred); spin_lock(&dir->i_lock); + if (list_empty(&nfsi->open_files) && + (nfsi->cache_validity & NFS_INO_DATA_INVAL_DEFER)) + nfsi->cache_validity |= NFS_INO_INVALID_DATA | + NFS_INO_REVAL_FORCED; list_add(&ctx->list, &nfsi->open_files); spin_unlock(&dir->i_lock); return ctx; diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 53777813ca95..ea52c71534b5 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -208,7 +208,7 @@ static void nfs_set_cache_invalid(struct inode *inode, unsigned long flags) } if (inode->i_mapping->nrpages == 0) - flags &= ~NFS_INO_INVALID_DATA; + flags &= ~(NFS_INO_INVALID_DATA|NFS_INO_DATA_INVAL_DEFER); nfsi->cache_validity |= flags; if (flags & NFS_INO_INVALID_DATA) nfs_fscache_invalidate(inode); @@ -652,7 +652,8 @@ static int nfs_vmtruncate(struct inode * inode, loff_t offset) i_size_write(inode, offset); /* Optimisation */ if (offset == 0) - NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_DATA; + NFS_I(inode)->cache_validity &= ~(NFS_INO_INVALID_DATA | + NFS_INO_DATA_INVAL_DEFER); NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_SIZE; spin_unlock(&inode->i_lock); @@ -1032,6 +1033,10 @@ void nfs_inode_attach_open_context(struct nfs_open_context *ctx) struct nfs_inode *nfsi = NFS_I(inode); spin_lock(&inode->i_lock); + if (list_empty(&nfsi->open_files) && + (nfsi->cache_validity & NFS_INO_DATA_INVAL_DEFER)) + nfsi->cache_validity |= NFS_INO_INVALID_DATA | + NFS_INO_REVAL_FORCED; list_add_tail_rcu(&ctx->list, &nfsi->open_files); spin_unlock(&inode->i_lock); } @@ -1313,7 +1318,8 @@ int nfs_revalidate_mapping(struct inode *inode, set_bit(NFS_INO_INVALIDATING, bitlock); smp_wmb(); - nfsi->cache_validity &= ~NFS_INO_INVALID_DATA; + nfsi->cache_validity &= ~(NFS_INO_INVALID_DATA| + NFS_INO_DATA_INVAL_DEFER); spin_unlock(&inode->i_lock); trace_nfs_invalidate_mapping_enter(inode); ret = nfs_invalidate_mapping(inode, mapping); @@ -1871,7 +1877,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) dprintk("NFS: change_attr change on server for file %s/%ld\n", inode->i_sb->s_id, inode->i_ino); - } + } else if (!have_delegation) + nfsi->cache_validity |= NFS_INO_DATA_INVAL_DEFER; inode_set_iversion_raw(inode, fattr->change_attr); attr_changed = true; } diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index d363d5765cdf..0a11712a80e3 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -223,6 +223,8 @@ struct nfs4_copy_state { #define NFS_INO_INVALID_MTIME BIT(10) /* cached mtime is invalid */ #define NFS_INO_INVALID_SIZE BIT(11) /* cached size is invalid */ #define NFS_INO_INVALID_OTHER BIT(12) /* other attrs are invalid */ +#define NFS_INO_DATA_INVAL_DEFER \ + BIT(13) /* Deferred cache invalidation */ #define NFS_INO_INVALID_ATTR (NFS_INO_INVALID_CHANGE \ | NFS_INO_INVALID_CTIME \ -- cgit v1.2.3-59-g8ed1b From bf11fbdb20b385157b046ea7781f04d0c62554a3 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 29 Jan 2019 21:40:10 -0500 Subject: NFS: Add sysfs support for per-container identifier In order to identify containers to the NFS client, we add a per-net sysfs attribute that udev can fill with the appropriate identifier. The identifier could be a unique hostname, but in most cases it will probably be a persisted uuid. Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 4 ++ fs/nfs/netns.h | 3 ++ fs/nfs/sysfs.c | 118 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ fs/nfs/sysfs.h | 10 +++++ 4 files changed, 135 insertions(+) (limited to 'fs') diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 0acb104f7b00..6e7aeb543f6a 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -49,6 +49,7 @@ #include "pnfs.h" #include "nfs.h" #include "netns.h" +#include "sysfs.h" #define NFSDBG_FACILITY NFSDBG_CLIENT @@ -1072,12 +1073,15 @@ void nfs_clients_init(struct net *net) #endif spin_lock_init(&nn->nfs_client_lock); nn->boot_time = ktime_get_real(); + + nfs_netns_sysfs_setup(nn, net); } void nfs_clients_exit(struct net *net) { struct nfs_net *nn = net_generic(net, nfs_net_id); + nfs_netns_sysfs_destroy(nn); nfs_cleanup_cb_ident_idr(net); WARN_ON_ONCE(!list_empty(&nn->nfs_client_list)); WARN_ON_ONCE(!list_empty(&nn->nfs_volume_list)); diff --git a/fs/nfs/netns.h b/fs/nfs/netns.h index fc9978c58265..c8374f74dce1 100644 --- a/fs/nfs/netns.h +++ b/fs/nfs/netns.h @@ -15,6 +15,8 @@ struct bl_dev_msg { uint32_t major, minor; }; +struct nfs_netns_client; + struct nfs_net { struct cache_detail *nfs_dns_resolve; struct rpc_pipe *bl_device_pipe; @@ -29,6 +31,7 @@ struct nfs_net { unsigned short nfs_callback_tcpport6; int cb_users[NFS4_MAX_MINOR_VERSION + 1]; #endif + struct nfs_netns_client *nfs_client; spinlock_t nfs_client_lock; ktime_t boot_time; #ifdef CONFIG_PROC_FS diff --git a/fs/nfs/sysfs.c b/fs/nfs/sysfs.c index 7070711ff6c5..4f3390b20239 100644 --- a/fs/nfs/sysfs.c +++ b/fs/nfs/sysfs.c @@ -9,7 +9,12 @@ #include #include #include +#include +#include +#include +#include "nfs4_fs.h" +#include "netns.h" #include "sysfs.h" struct kobject *nfs_client_kobj; @@ -67,3 +72,116 @@ void nfs_sysfs_exit(void) kobject_put(nfs_client_kobj); kset_unregister(nfs_client_kset); } + +static ssize_t nfs_netns_identifier_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct nfs_netns_client *c = container_of(kobj, + struct nfs_netns_client, + kobject); + return scnprintf(buf, PAGE_SIZE, "%s\n", c->identifier); +} + +/* Strip trailing '\n' */ +static size_t nfs_string_strip(const char *c, size_t len) +{ + while (len > 0 && c[len-1] == '\n') + --len; + return len; +} + +static ssize_t nfs_netns_identifier_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t count) +{ + struct nfs_netns_client *c = container_of(kobj, + struct nfs_netns_client, + kobject); + const char *old; + char *p; + size_t len; + + len = nfs_string_strip(buf, min_t(size_t, count, CONTAINER_ID_MAXLEN)); + if (!len) + return 0; + p = kmemdup_nul(buf, len, GFP_KERNEL); + if (!p) + return -ENOMEM; + old = xchg(&c->identifier, p); + if (old) { + synchronize_rcu(); + kfree(old); + } + return count; +} + +static void nfs_netns_client_release(struct kobject *kobj) +{ + struct nfs_netns_client *c = container_of(kobj, + struct nfs_netns_client, + kobject); + + if (c->identifier) + kfree(c->identifier); + kfree(c); +} + +static const void *nfs_netns_client_namespace(struct kobject *kobj) +{ + return container_of(kobj, struct nfs_netns_client, kobject)->net; +} + +static struct kobj_attribute nfs_netns_client_id = __ATTR(identifier, + 0644, nfs_netns_identifier_show, nfs_netns_identifier_store); + +static struct attribute *nfs_netns_client_attrs[] = { + &nfs_netns_client_id.attr, + NULL, +}; + +static struct kobj_type nfs_netns_client_type = { + .release = nfs_netns_client_release, + .default_attrs = nfs_netns_client_attrs, + .sysfs_ops = &kobj_sysfs_ops, + .namespace = nfs_netns_client_namespace, +}; + +static struct nfs_netns_client *nfs_netns_client_alloc(struct kobject *parent, + struct net *net) +{ + struct nfs_netns_client *p; + + p = kzalloc(sizeof(*p), GFP_KERNEL); + if (p) { + p->net = net; + p->kobject.kset = nfs_client_kset; + if (kobject_init_and_add(&p->kobject, &nfs_netns_client_type, + parent, "nfs_client") == 0) + return p; + kobject_put(&p->kobject); + } + return NULL; +} + +void nfs_netns_sysfs_setup(struct nfs_net *netns, struct net *net) +{ + struct nfs_netns_client *clp; + + clp = nfs_netns_client_alloc(nfs_client_kobj, net); + if (clp) { + netns->nfs_client = clp; + kobject_uevent(&clp->kobject, KOBJ_ADD); + } +} + +void nfs_netns_sysfs_destroy(struct nfs_net *netns) +{ + struct nfs_netns_client *clp = netns->nfs_client; + + if (clp) { + kobject_uevent(&clp->kobject, KOBJ_REMOVE); + kobject_del(&clp->kobject); + kobject_put(&clp->kobject); + netns->nfs_client = NULL; + } +} diff --git a/fs/nfs/sysfs.h b/fs/nfs/sysfs.h index 666f8db2ba92..f1b27411dcc0 100644 --- a/fs/nfs/sysfs.h +++ b/fs/nfs/sysfs.h @@ -6,10 +6,20 @@ #ifndef __NFS_SYSFS_H #define __NFS_SYSFS_H +#define CONTAINER_ID_MAXLEN (64) + +struct nfs_netns_client { + struct kobject kobject; + struct net *net; + const char *identifier; +}; extern struct kobject *nfs_client_kobj; extern int nfs_sysfs_init(void); extern void nfs_sysfs_exit(void); +void nfs_netns_sysfs_setup(struct nfs_net *netns, struct net *net); +void nfs_netns_sysfs_destroy(struct nfs_net *netns); + #endif -- cgit v1.2.3-59-g8ed1b From 28cc5cd8c68f1cd2146d88c7a54cc3234ed4dabe Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 26 Apr 2017 23:30:48 -0400 Subject: NFS: Add a mount option to specify number of TCP connections to use Allow the user to specify that the client should use multiple connections to the server. For the moment, this functionality will be limited to TCP and to NFSv4.x (x>0). Signed-off-by: Trond Myklebust --- fs/nfs/internal.h | 1 + fs/nfs/super.c | 10 ++++++++++ 2 files changed, 11 insertions(+) (limited to 'fs') diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 498fab72f70b..bba09dace5d6 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -123,6 +123,7 @@ struct nfs_parsed_mount_data { char *export_path; int port; unsigned short protocol; + unsigned short nconnect; } nfs_server; void *lsm_opts; diff --git a/fs/nfs/super.c b/fs/nfs/super.c index f88ddac2dcdf..68ad0b8bcabd 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -77,6 +77,8 @@ #define NFS_DEFAULT_VERSION 2 #endif +#define NFS_MAX_CONNECTIONS 16 + enum { /* Mount options that take no arguments */ Opt_soft, Opt_softerr, Opt_hard, @@ -108,6 +110,7 @@ enum { Opt_nfsvers, Opt_sec, Opt_proto, Opt_mountproto, Opt_mounthost, Opt_addr, Opt_mountaddr, Opt_clientaddr, + Opt_nconnect, Opt_lookupcache, Opt_fscache_uniq, Opt_local_lock, @@ -181,6 +184,8 @@ static const match_table_t nfs_mount_option_tokens = { { Opt_mounthost, "mounthost=%s" }, { Opt_mountaddr, "mountaddr=%s" }, + { Opt_nconnect, "nconnect=%s" }, + { Opt_lookupcache, "lookupcache=%s" }, { Opt_fscache_uniq, "fsc=%s" }, { Opt_local_lock, "local_lock=%s" }, @@ -1549,6 +1554,11 @@ static int nfs_parse_mount_options(char *raw, if (mnt->mount_server.addrlen == 0) goto out_invalid_address; break; + case Opt_nconnect: + if (nfs_get_option_ul_bound(args, &option, 1, NFS_MAX_CONNECTIONS)) + goto out_invalid_value; + mnt->nfs_server.nconnect = option; + break; case Opt_lookupcache: string = match_strdup(args); if (string == NULL) -- cgit v1.2.3-59-g8ed1b From 6619079d05404cb32be29af329b87ac3b0ad4f96 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 27 Apr 2017 11:13:40 -0400 Subject: NFSv4: Allow multiple connections to NFSv4.x (x>0) servers If the user specifies the -onconn= mount option, and the transport protocol is TCP, then set up connections to the server. The connections will all go to the same IP address. Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 2 ++ fs/nfs/internal.h | 1 + fs/nfs/nfs4client.c | 11 +++++++++-- include/linux/nfs_fs_sb.h | 1 + 4 files changed, 13 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/nfs/client.c b/fs/nfs/client.c index d7e4f0848e28..fa6953e56a71 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -175,6 +175,7 @@ struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_init) clp->cl_rpcclient = ERR_PTR(-EINVAL); clp->cl_proto = cl_init->proto; + clp->cl_nconnect = cl_init->nconnect; clp->cl_net = get_net(cl_init->net); clp->cl_principal = "*"; @@ -493,6 +494,7 @@ int nfs_create_rpc_client(struct nfs_client *clp, struct rpc_create_args args = { .net = clp->cl_net, .protocol = clp->cl_proto, + .nconnect = clp->cl_nconnect, .address = (struct sockaddr *)&clp->cl_addr, .addrsize = clp->cl_addrlen, .timeout = cl_init->timeparms, diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index bba09dace5d6..4a49dc1495c5 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -82,6 +82,7 @@ struct nfs_client_initdata { struct nfs_subversion *nfs_mod; int proto; u32 minorversion; + unsigned int nconnect; struct net *net; const struct rpc_timeout *timeparms; const struct cred *cred; diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index 81b9b6d7927a..5c244c440658 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -859,7 +859,8 @@ static int nfs4_set_client(struct nfs_server *server, const size_t addrlen, const char *ip_addr, int proto, const struct rpc_timeout *timeparms, - u32 minorversion, struct net *net) + u32 minorversion, unsigned int nconnect, + struct net *net) { struct nfs_client_initdata cl_init = { .hostname = hostname, @@ -875,6 +876,8 @@ static int nfs4_set_client(struct nfs_server *server, }; struct nfs_client *clp; + if (minorversion > 0 && proto == XPRT_TRANSPORT_TCP) + cl_init.nconnect = nconnect; if (server->flags & NFS_MOUNT_NORESVPORT) set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags); if (server->options & NFS_OPTION_MIGRATION) @@ -1074,6 +1077,7 @@ static int nfs4_init_server(struct nfs_server *server, data->nfs_server.protocol, &timeparms, data->minorversion, + data->nfs_server.nconnect, data->net); if (error < 0) return error; @@ -1163,6 +1167,7 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data, XPRT_TRANSPORT_RDMA, parent_server->client->cl_timeout, parent_client->cl_mvops->minor_version, + parent_client->cl_nconnect, parent_client->cl_net); if (!error) goto init_server; @@ -1176,6 +1181,7 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data, XPRT_TRANSPORT_TCP, parent_server->client->cl_timeout, parent_client->cl_mvops->minor_version, + parent_client->cl_nconnect, parent_client->cl_net); if (error < 0) goto error; @@ -1271,7 +1277,8 @@ int nfs4_update_server(struct nfs_server *server, const char *hostname, set_bit(NFS_MIG_TSM_POSSIBLE, &server->mig_status); error = nfs4_set_client(server, hostname, sap, salen, buf, clp->cl_proto, clnt->cl_timeout, - clp->cl_minorversion, net); + clp->cl_minorversion, + clp->cl_nconnect, net); clear_bit(NFS_MIG_TSM_POSSIBLE, &server->mig_status); if (error != 0) { nfs_server_insert_lists(server); diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 1e78032a174b..a87fe854f008 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -58,6 +58,7 @@ struct nfs_client { struct nfs_subversion * cl_nfs_mod; /* pointer to nfs version module */ u32 cl_minorversion;/* NFSv4 minorversion */ + unsigned int cl_nconnect; /* Number of connections */ const char * cl_principal; /* used for machine cred */ #if IS_ENABLED(CONFIG_NFS_V4) -- cgit v1.2.3-59-g8ed1b From bb71e4a5d7eb151aa8d4e98c628b744f78af7c57 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 27 Apr 2017 11:22:07 -0400 Subject: pNFS: Allow multiple connections to the DS If the user specifies -onconnect= mount option, and the transport protocol is TCP, then set up connections to the pNFS data server as well. The connections will all go to the same IP address. Signed-off-by: Trond Myklebust --- fs/nfs/nfs3client.c | 3 +++ fs/nfs/nfs4client.c | 3 +++ 2 files changed, 6 insertions(+) (limited to 'fs') diff --git a/fs/nfs/nfs3client.c b/fs/nfs/nfs3client.c index fb0c425b5d45..148ceb74d27c 100644 --- a/fs/nfs/nfs3client.c +++ b/fs/nfs/nfs3client.c @@ -102,6 +102,9 @@ struct nfs_client *nfs3_set_ds_client(struct nfs_server *mds_srv, return ERR_PTR(-EINVAL); cl_init.hostname = buf; + if (mds_clp->cl_nconnect > 1 && ds_proto == XPRT_TRANSPORT_TCP) + cl_init.nconnect = mds_clp->cl_nconnect; + if (mds_srv->flags & NFS_MOUNT_NORESVPORT) set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags); diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index 5c244c440658..616393a01c06 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -944,6 +944,9 @@ struct nfs_client *nfs4_set_ds_client(struct nfs_server *mds_srv, return ERR_PTR(-EINVAL); cl_init.hostname = buf; + if (mds_clp->cl_nconnect > 1 && ds_proto == XPRT_TRANSPORT_TCP) + cl_init.nconnect = mds_clp->cl_nconnect; + if (mds_srv->flags & NFS_MOUNT_NORESVPORT) __set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags); -- cgit v1.2.3-59-g8ed1b From fd87c8b73a0c7689cf3112b35c8c173e8326a5fb Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 27 Apr 2017 11:40:04 -0400 Subject: NFS: Display the "nconnect" mount option if it is set. Signed-off-by: Trond Myklebust --- fs/nfs/super.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs') diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 68ad0b8bcabd..bd3ba1d323ea 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -678,6 +678,8 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss, seq_printf(m, ",proto=%s", rpc_peeraddr2str(nfss->client, RPC_DISPLAY_NETID)); rcu_read_unlock(); + if (clp->cl_nconnect > 0) + seq_printf(m, ",nconnect=%u", clp->cl_nconnect); if (version == 4) { if (nfss->port != NFS_PORT) seq_printf(m, ",port=%u", nfss->port); -- cgit v1.2.3-59-g8ed1b From 53c326307156249253be36b33cd6c32f4a24f9b2 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 17 Sep 2018 13:32:02 -0400 Subject: NFS: Allow multiple connections to a NFSv2 or NFSv3 server Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/nfs/client.c b/fs/nfs/client.c index fa6953e56a71..f6ec884398bd 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -660,6 +660,7 @@ static int nfs_init_server(struct nfs_server *server, .net = data->net, .timeparms = &timeparms, .cred = server->cred, + .nconnect = data->nfs_server.nconnect, }; struct nfs_client *clp; int error; -- cgit v1.2.3-59-g8ed1b From 5a0c257f8e0f4c4b3c33dff545317c21a921303e Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 30 May 2019 10:41:28 +1000 Subject: NFS: send state management on a single connection. With NFSv4.1, different network connections need to be explicitly bound to a session. During session startup, this is not possible so only a single connection must be used for session startup. So add a task flag to disable the default round-robin choice of connections (when nconnect > 1) and force the use of a single connection. Then use that flag on all requests for session management - for consistence, include NFSv4.0 management (SETCLIENTID) and session destruction Reported-by: Chuck Lever Signed-off-by: NeilBrown Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 22 +++++++++++++--------- include/linux/sunrpc/sched.h | 1 + net/sunrpc/clnt.c | 24 +++++++++++++++++++++++- 3 files changed, 37 insertions(+), 10 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 26626ea1f197..d115d9973efc 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -5992,7 +5992,7 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, .rpc_message = &msg, .callback_ops = &nfs4_setclientid_ops, .callback_data = &setclientid, - .flags = RPC_TASK_TIMEOUT, + .flags = RPC_TASK_TIMEOUT | RPC_TASK_NO_ROUND_ROBIN, }; int status; @@ -6058,7 +6058,8 @@ int nfs4_proc_setclientid_confirm(struct nfs_client *clp, dprintk("NFS call setclientid_confirm auth=%s, (client ID %llx)\n", clp->cl_rpcclient->cl_auth->au_ops->au_name, clp->cl_clientid); - status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT); + status = rpc_call_sync(clp->cl_rpcclient, &msg, + RPC_TASK_TIMEOUT | RPC_TASK_NO_ROUND_ROBIN); trace_nfs4_setclientid_confirm(clp, status); dprintk("NFS reply setclientid_confirm: %d\n", status); return status; @@ -7639,7 +7640,7 @@ static int _nfs4_proc_secinfo(struct inode *dir, const struct qstr *name, struct NFS_SP4_MACH_CRED_SECINFO, &clnt, &msg); status = nfs4_call_sync(clnt, NFS_SERVER(dir), &msg, &args.seq_args, - &res.seq_res, 0); + &res.seq_res, RPC_TASK_NO_ROUND_ROBIN); dprintk("NFS reply secinfo: %d\n", status); put_cred(cred); @@ -7977,7 +7978,7 @@ nfs4_run_exchange_id(struct nfs_client *clp, const struct cred *cred, .rpc_client = clp->cl_rpcclient, .callback_ops = &nfs4_exchange_id_call_ops, .rpc_message = &msg, - .flags = RPC_TASK_TIMEOUT, + .flags = RPC_TASK_TIMEOUT | RPC_TASK_NO_ROUND_ROBIN, }; struct nfs41_exchange_id_data *calldata; int status; @@ -8202,7 +8203,8 @@ static int _nfs4_proc_destroy_clientid(struct nfs_client *clp, }; int status; - status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT); + status = rpc_call_sync(clp->cl_rpcclient, &msg, + RPC_TASK_TIMEOUT | RPC_TASK_NO_ROUND_ROBIN); trace_nfs4_destroy_clientid(clp, status); if (status) dprintk("NFS: Got error %d from the server %s on " @@ -8481,7 +8483,8 @@ static int _nfs4_proc_create_session(struct nfs_client *clp, nfs4_init_channel_attrs(&args, clp->cl_rpcclient); args.flags = (SESSION4_PERSIST | SESSION4_BACK_CHAN); - status = rpc_call_sync(session->clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT); + status = rpc_call_sync(session->clp->cl_rpcclient, &msg, + RPC_TASK_TIMEOUT | RPC_TASK_NO_ROUND_ROBIN); trace_nfs4_create_session(clp, status); switch (status) { @@ -8557,7 +8560,8 @@ int nfs4_proc_destroy_session(struct nfs4_session *session, if (!test_and_clear_bit(NFS4_SESSION_ESTABLISHED, &session->session_state)) return 0; - status = rpc_call_sync(session->clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT); + status = rpc_call_sync(session->clp->cl_rpcclient, &msg, + RPC_TASK_TIMEOUT | RPC_TASK_NO_ROUND_ROBIN); trace_nfs4_destroy_session(session->clp, status); if (status) @@ -8811,7 +8815,7 @@ static int nfs41_proc_reclaim_complete(struct nfs_client *clp, .rpc_client = clp->cl_rpcclient, .rpc_message = &msg, .callback_ops = &nfs4_reclaim_complete_call_ops, - .flags = RPC_TASK_ASYNC, + .flags = RPC_TASK_ASYNC | RPC_TASK_NO_ROUND_ROBIN, }; int status = -ENOMEM; @@ -9330,7 +9334,7 @@ _nfs41_proc_secinfo_no_name(struct nfs_server *server, struct nfs_fh *fhandle, dprintk("--> %s\n", __func__); status = nfs4_call_sync(clnt, server, &msg, &args.seq_args, - &res.seq_res, 0); + &res.seq_res, RPC_TASK_NO_ROUND_ROBIN); dprintk("<-- %s status=%d\n", __func__, status); put_cred(cred); diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index d0e451868f02..11424bdf09e6 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -126,6 +126,7 @@ struct rpc_task_setup { #define RPC_CALL_MAJORSEEN 0x0020 /* major timeout seen */ #define RPC_TASK_ROOTCREDS 0x0040 /* force root creds */ #define RPC_TASK_DYNAMIC 0x0080 /* task was kmalloc'ed */ +#define RPC_TASK_NO_ROUND_ROBIN 0x0100 /* send requests on "main" xprt */ #define RPC_TASK_SOFT 0x0200 /* Use soft timeouts */ #define RPC_TASK_SOFTCONN 0x0400 /* Fail if can't connect */ #define RPC_TASK_SENT 0x0800 /* message was sent */ diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index b6aca8cb5ae6..d599fab8adcb 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -995,6 +995,24 @@ rpc_task_get_xprt(struct rpc_clnt *clnt) return xprt; } +static struct rpc_xprt * +rpc_task_get_first_xprt(struct rpc_clnt *clnt) +{ + struct rpc_xprt_switch *xps; + struct rpc_xprt *xprt; + + rcu_read_lock(); + xprt = xprt_get(rcu_dereference(clnt->cl_xprt)); + if (xprt) { + atomic_long_inc(&xprt->queuelen); + xps = rcu_dereference(clnt->cl_xpi.xpi_xpswitch); + atomic_long_inc(&xps->xps_queuelen); + } + rcu_read_unlock(); + + return xprt; +} + static void rpc_task_release_xprt(struct rpc_clnt *clnt, struct rpc_xprt *xprt) { @@ -1042,7 +1060,11 @@ void rpc_task_release_client(struct rpc_task *task) static void rpc_task_set_transport(struct rpc_task *task, struct rpc_clnt *clnt) { - if (!task->tk_xprt) + if (task->tk_xprt) + return; + if (task->tk_flags & RPC_TASK_NO_ROUND_ROBIN) + task->tk_xprt = rpc_task_get_first_xprt(clnt); + else task->tk_xprt = rpc_task_get_xprt(clnt); } -- cgit v1.2.3-59-g8ed1b From 2b17d725f9be59a1bfa0583af690c463fca1f385 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 11 Jun 2019 16:49:52 -0400 Subject: NFS: Clean up writeback code Now that the VM promises never to recurse back into the filesystem layer on writeback, remove all the GFP_NOFS references etc from the generic writeback code. Signed-off-by: Trond Myklebust --- fs/nfs/pagelist.c | 6 ++---- fs/nfs/pnfs.c | 2 +- fs/nfs/write.c | 7 ++----- 3 files changed, 5 insertions(+), 10 deletions(-) (limited to 'fs') diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 6ef5278326b6..ed4e1b07447b 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -77,7 +77,7 @@ void nfs_set_pgio_error(struct nfs_pgio_header *hdr, int error, loff_t pos) static inline struct nfs_page * nfs_page_alloc(void) { - struct nfs_page *p = kmem_cache_zalloc(nfs_page_cachep, GFP_NOIO); + struct nfs_page *p = kmem_cache_zalloc(nfs_page_cachep, GFP_KERNEL); if (p) INIT_LIST_HEAD(&p->wb_list); return p; @@ -775,8 +775,6 @@ int nfs_generic_pgio(struct nfs_pageio_descriptor *desc, if (pagecount <= ARRAY_SIZE(pg_array->page_array)) pg_array->pagevec = pg_array->page_array; else { - if (hdr->rw_mode == FMODE_WRITE) - gfp_flags = GFP_NOIO; pg_array->pagevec = kcalloc(pagecount, sizeof(struct page *), gfp_flags); if (!pg_array->pagevec) { pg_array->npages = 0; @@ -851,7 +849,7 @@ nfs_pageio_alloc_mirrors(struct nfs_pageio_descriptor *desc, desc->pg_mirrors_dynamic = NULL; if (mirror_count == 1) return desc->pg_mirrors_static; - ret = kmalloc_array(mirror_count, sizeof(*ret), GFP_NOFS); + ret = kmalloc_array(mirror_count, sizeof(*ret), GFP_KERNEL); if (ret != NULL) { for (i = 0; i < mirror_count; i++) nfs_pageio_mirror_init(&ret[i], desc->pg_bsize); diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 83722e936b4a..56e423cd8180 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -2468,7 +2468,7 @@ pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, wb_size, IOMODE_RW, false, - GFP_NOFS); + GFP_KERNEL); if (IS_ERR(pgio->pg_lseg)) { pgio->pg_error = PTR_ERR(pgio->pg_lseg); pgio->pg_lseg = NULL; diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 059a7c38bc4f..92d9cadc6102 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -103,7 +103,7 @@ EXPORT_SYMBOL_GPL(nfs_commit_free); static struct nfs_pgio_header *nfs_writehdr_alloc(void) { - struct nfs_pgio_header *p = mempool_alloc(nfs_wdata_mempool, GFP_NOIO); + struct nfs_pgio_header *p = mempool_alloc(nfs_wdata_mempool, GFP_KERNEL); memset(p, 0, sizeof(*p)); p->rw_mode = FMODE_WRITE; @@ -721,12 +721,11 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc) struct inode *inode = mapping->host; struct nfs_pageio_descriptor pgio; struct nfs_io_completion *ioc; - unsigned int pflags = memalloc_nofs_save(); int err; nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES); - ioc = nfs_io_completion_alloc(GFP_NOFS); + ioc = nfs_io_completion_alloc(GFP_KERNEL); if (ioc) nfs_io_completion_init(ioc, nfs_io_completion_commit, inode); @@ -737,8 +736,6 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc) nfs_pageio_complete(&pgio); nfs_io_completion_put(ioc); - memalloc_nofs_restore(pflags); - if (err < 0) goto out_err; err = pgio.pg_error; -- cgit v1.2.3-59-g8ed1b From 1a7441b282cef3bb94acaba9f474ebc6d22277ba Mon Sep 17 00:00:00 2001 From: Dave Wysochanski Date: Fri, 17 May 2019 17:06:41 -0400 Subject: NFSv4: Add lease_time and lease_expired to 'nfs4:' line of mountstats On the NFS client there is no low-impact way to determine the nfs4 lease time or whether the lease is expired, so add these to mountstats with times displayed in seconds. If the lease is not expired, display lease_expired=0. Otherwise, display lease_expired=seconds_since_expired, similar to 'age:' line in mountstats. Signed-off-by: Dave Wysochanski Signed-off-by: Trond Myklebust --- fs/nfs/super.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'fs') diff --git a/fs/nfs/super.c b/fs/nfs/super.c index bd3ba1d323ea..d01def299d40 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -742,6 +742,16 @@ int nfs_show_options(struct seq_file *m, struct dentry *root) EXPORT_SYMBOL_GPL(nfs_show_options); #if IS_ENABLED(CONFIG_NFS_V4) +static void show_lease(struct seq_file *m, struct nfs_server *server) +{ + struct nfs_client *clp = server->nfs_client; + unsigned long expire; + + seq_printf(m, ",lease_time=%ld", clp->cl_lease_time / HZ); + expire = clp->cl_last_renewal + clp->cl_lease_time; + seq_printf(m, ",lease_expired=%ld", + time_after(expire, jiffies) ? 0 : (jiffies - expire) / HZ); +} #ifdef CONFIG_NFS_V4_1 static void show_sessions(struct seq_file *m, struct nfs_server *server) { @@ -850,6 +860,7 @@ int nfs_show_stats(struct seq_file *m, struct dentry *root) seq_printf(m, ",acl=0x%x", nfss->acl_bitmask); show_sessions(m, nfss); show_pnfs(m, nfss); + show_lease(m, nfss); } #endif -- cgit v1.2.3-59-g8ed1b From 9026b3a973b0b0b73c15ba40aff87cd0959fd0f3 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Fri, 31 May 2019 14:06:05 -0700 Subject: nfs: disable client side deduplication The NFS protocol doesn't support deduplication, so turn it off again. Fixes: ce96e888fe48e ("Fix nfs4.2 return -EINVAL when do dedupe operation") Signed-off-by: Darrick J. Wong Signed-off-by: Trond Myklebust --- fs/nfs/nfs4file.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index 3a507c42c1ca..52dbd17e401e 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c @@ -187,7 +187,11 @@ static loff_t nfs42_remap_file_range(struct file *src_file, loff_t src_off, bool same_inode = false; int ret; - if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY)) + /* NFS does not support deduplication. */ + if (remap_flags & REMAP_FILE_DEDUP) + return -EOPNOTSUPP; + + if (remap_flags & ~REMAP_FILE_ADVISORY) return -EINVAL; /* check alignment w.r.t. clone_blksize */ -- cgit v1.2.3-59-g8ed1b From 9f7761cf0409465075dadb875d5d4b8ef2f890c8 Mon Sep 17 00:00:00 2001 From: Benjamin Coddington Date: Tue, 11 Jun 2019 12:57:52 -0400 Subject: NFS: Cleanup if nfs_match_client is interrupted Don't bail out before cleaning up a new allocation if the wait for searching for a matching nfs client is interrupted. Memory leaks. Reported-by: syzbot+7fe11b49c1cc30e3fce2@syzkaller.appspotmail.com Fixes: 950a578c6128 ("NFS: make nfs_match_client killable") Signed-off-by: Benjamin Coddington Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/nfs/client.c b/fs/nfs/client.c index e4fa0a5fd183..30838304a0bf 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -408,10 +408,10 @@ struct nfs_client *nfs_get_client(const struct nfs_client_initdata *cl_init) clp = nfs_match_client(cl_init); if (clp) { spin_unlock(&nn->nfs_client_lock); - if (IS_ERR(clp)) - return clp; if (new) new->rpc_ops->free_client(new); + if (IS_ERR(clp)) + return clp; return nfs_found_client(cl_init, clp); } if (new) { -- cgit v1.2.3-59-g8ed1b From c5833f0dc4cef778a718ad7d00df38e199045997 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 19 Jun 2019 10:33:47 -0400 Subject: NFS4: Add a trace event to record invalid CB sequence IDs Help debug NFSv4 callback failures. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- fs/nfs/callback_proc.c | 28 ++++++++++++++++++++-------- fs/nfs/nfs4trace.h | 38 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 58 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index 315967354954..f39924ba050b 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -414,27 +414,39 @@ static __be32 validate_seqid(const struct nfs4_slot_table *tbl, const struct nfs4_slot *slot, const struct cb_sequenceargs * args) { + __be32 ret; + + ret = cpu_to_be32(NFS4ERR_BADSLOT); if (args->csa_slotid > tbl->server_highest_slotid) - return htonl(NFS4ERR_BADSLOT); + goto out_err; /* Replay */ if (args->csa_sequenceid == slot->seq_nr) { + ret = cpu_to_be32(NFS4ERR_DELAY); if (nfs4_test_locked_slot(tbl, slot->slot_nr)) - return htonl(NFS4ERR_DELAY); + goto out_err; + /* Signal process_op to set this error on next op */ + ret = cpu_to_be32(NFS4ERR_RETRY_UNCACHED_REP); if (args->csa_cachethis == 0) - return htonl(NFS4ERR_RETRY_UNCACHED_REP); + goto out_err; /* Liar! We never allowed you to set csa_cachethis != 0 */ - return htonl(NFS4ERR_SEQ_FALSE_RETRY); + ret = cpu_to_be32(NFS4ERR_SEQ_FALSE_RETRY); + goto out_err; } /* Note: wraparound relies on seq_nr being of type u32 */ - if (likely(args->csa_sequenceid == slot->seq_nr + 1)) - return htonl(NFS4_OK); - /* Misordered request */ - return htonl(NFS4ERR_SEQ_MISORDERED); + ret = cpu_to_be32(NFS4ERR_SEQ_MISORDERED); + if (args->csa_sequenceid != slot->seq_nr + 1) + goto out_err; + + return cpu_to_be32(NFS4_OK); + +out_err: + trace_nfs4_cb_seqid_err(args, ret); + return ret; } /* diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h index cd1a5c08da9a..6beb1f29134b 100644 --- a/fs/nfs/nfs4trace.h +++ b/fs/nfs/nfs4trace.h @@ -490,6 +490,44 @@ TRACE_EVENT(nfs4_cb_sequence, __entry->highest_slotid ) ); + +TRACE_EVENT(nfs4_cb_seqid_err, + TP_PROTO( + const struct cb_sequenceargs *args, + __be32 status + ), + TP_ARGS(args, status), + + TP_STRUCT__entry( + __field(unsigned int, session) + __field(unsigned int, slot_nr) + __field(unsigned int, seq_nr) + __field(unsigned int, highest_slotid) + __field(unsigned int, cachethis) + __field(int, error) + ), + + TP_fast_assign( + __entry->session = nfs_session_id_hash(&args->csa_sessionid); + __entry->slot_nr = args->csa_slotid; + __entry->seq_nr = args->csa_sequenceid; + __entry->highest_slotid = args->csa_highestslotid; + __entry->cachethis = args->csa_cachethis; + __entry->error = -be32_to_cpu(status); + ), + + TP_printk( + "error=%d (%s) session=0x%08x slot_nr=%u seq_nr=%u " + "highest_slotid=%u", + __entry->error, + show_nfsv4_errors(__entry->error), + __entry->session, + __entry->slot_nr, + __entry->seq_nr, + __entry->highest_slotid + ) +); + #endif /* CONFIG_NFS_V4_1 */ TRACE_EVENT(nfs4_setup_sequence, -- cgit v1.2.3-59-g8ed1b From 96650e2effa2dbc9d5002fed0c2f8a72de082918 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 19 Jun 2019 10:33:53 -0400 Subject: NFS: Fix show_nfs_errors macros again I noticed that NFS status values stopped working again. trace_print_symbols_seq() takes an unsigned long. Passing a negative errno or negative NFSERR value just confuses it, and since we're using C macros here and not static inline functions, all bets are off due to implicit type conversion. Straight-line the calling conventions so that error codes are stored in the trace record as positive values in an unsigned long field, mapped to symbolic as an unsigned long, and displayed as a negative value, to continue to enable grepping on "error=-". It's often the case that an error value that is positive is a byte count but when it's negative, it's an error (e.g. nfs4_write). Fix those cases so that the value that is eventually stored in the error field is a positive NFS status or errno, or zero. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- fs/nfs/nfs4trace.h | 164 ++++++++++++++++++++++++++--------------------------- 1 file changed, 82 insertions(+), 82 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h index 6beb1f29134b..9a39fd51c8e7 100644 --- a/fs/nfs/nfs4trace.h +++ b/fs/nfs/nfs4trace.h @@ -156,7 +156,7 @@ TRACE_DEFINE_ENUM(NFS4ERR_WRONG_TYPE); TRACE_DEFINE_ENUM(NFS4ERR_XDEV); #define show_nfsv4_errors(error) \ - __print_symbolic(-(error), \ + __print_symbolic(error, \ { NFS4_OK, "OK" }, \ /* Mapped by nfs4_stat_to_errno() */ \ { EPERM, "EPERM" }, \ @@ -348,7 +348,7 @@ DECLARE_EVENT_CLASS(nfs4_clientid_event, TP_STRUCT__entry( __string(dstaddr, clp->cl_hostname) - __field(int, error) + __field(unsigned long, error) ), TP_fast_assign( @@ -357,8 +357,8 @@ DECLARE_EVENT_CLASS(nfs4_clientid_event, ), TP_printk( - "error=%d (%s) dstaddr=%s", - __entry->error, + "error=%ld (%s) dstaddr=%s", + -__entry->error, show_nfsv4_errors(__entry->error), __get_str(dstaddr) ) @@ -420,7 +420,7 @@ TRACE_EVENT(nfs4_sequence_done, __field(unsigned int, highest_slotid) __field(unsigned int, target_highest_slotid) __field(unsigned int, status_flags) - __field(int, error) + __field(unsigned long, error) ), TP_fast_assign( @@ -435,10 +435,10 @@ TRACE_EVENT(nfs4_sequence_done, __entry->error = res->sr_status; ), TP_printk( - "error=%d (%s) session=0x%08x slot_nr=%u seq_nr=%u " + "error=%ld (%s) session=0x%08x slot_nr=%u seq_nr=%u " "highest_slotid=%u target_highest_slotid=%u " "status_flags=%u (%s)", - __entry->error, + -__entry->error, show_nfsv4_errors(__entry->error), __entry->session, __entry->slot_nr, @@ -467,7 +467,7 @@ TRACE_EVENT(nfs4_cb_sequence, __field(unsigned int, seq_nr) __field(unsigned int, highest_slotid) __field(unsigned int, cachethis) - __field(int, error) + __field(unsigned long, error) ), TP_fast_assign( @@ -476,13 +476,13 @@ TRACE_EVENT(nfs4_cb_sequence, __entry->seq_nr = args->csa_sequenceid; __entry->highest_slotid = args->csa_highestslotid; __entry->cachethis = args->csa_cachethis; - __entry->error = -be32_to_cpu(status); + __entry->error = be32_to_cpu(status); ), TP_printk( - "error=%d (%s) session=0x%08x slot_nr=%u seq_nr=%u " + "error=%ld (%s) session=0x%08x slot_nr=%u seq_nr=%u " "highest_slotid=%u", - __entry->error, + -__entry->error, show_nfsv4_errors(__entry->error), __entry->session, __entry->slot_nr, @@ -504,7 +504,7 @@ TRACE_EVENT(nfs4_cb_seqid_err, __field(unsigned int, seq_nr) __field(unsigned int, highest_slotid) __field(unsigned int, cachethis) - __field(int, error) + __field(unsigned long, error) ), TP_fast_assign( @@ -513,13 +513,13 @@ TRACE_EVENT(nfs4_cb_seqid_err, __entry->seq_nr = args->csa_sequenceid; __entry->highest_slotid = args->csa_highestslotid; __entry->cachethis = args->csa_cachethis; - __entry->error = -be32_to_cpu(status); + __entry->error = be32_to_cpu(status); ), TP_printk( - "error=%d (%s) session=0x%08x slot_nr=%u seq_nr=%u " + "error=%ld (%s) session=0x%08x slot_nr=%u seq_nr=%u " "highest_slotid=%u", - __entry->error, + -__entry->error, show_nfsv4_errors(__entry->error), __entry->session, __entry->slot_nr, @@ -572,18 +572,18 @@ TRACE_EVENT(nfs4_xdr_status, TP_STRUCT__entry( __field(u32, op) - __field(int, error) + __field(unsigned long, error) ), TP_fast_assign( __entry->op = op; - __entry->error = -error; + __entry->error = error; ), TP_printk( - "operation %d: nfs status %d (%s)", - __entry->op, - __entry->error, show_nfsv4_errors(__entry->error) + "error=%ld (%s) operation %d:", + -__entry->error, show_nfsv4_errors(__entry->error), + __entry->op ) ); @@ -597,7 +597,7 @@ DECLARE_EVENT_CLASS(nfs4_open_event, TP_ARGS(ctx, flags, error), TP_STRUCT__entry( - __field(int, error) + __field(unsigned long, error) __field(unsigned int, flags) __field(unsigned int, fmode) __field(dev_t, dev) @@ -615,7 +615,7 @@ DECLARE_EVENT_CLASS(nfs4_open_event, const struct nfs4_state *state = ctx->state; const struct inode *inode = NULL; - __entry->error = error; + __entry->error = -error; __entry->flags = flags; __entry->fmode = (__force unsigned int)ctx->mode; __entry->dev = ctx->dentry->d_sb->s_dev; @@ -647,11 +647,11 @@ DECLARE_EVENT_CLASS(nfs4_open_event, ), TP_printk( - "error=%d (%s) flags=%d (%s) fmode=%s " + "error=%ld (%s) flags=%d (%s) fmode=%s " "fileid=%02x:%02x:%llu fhandle=0x%08x " "name=%02x:%02x:%llu/%s stateid=%d:0x%08x " "openstateid=%d:0x%08x", - __entry->error, + -__entry->error, show_nfsv4_errors(__entry->error), __entry->flags, show_open_flags(__entry->flags), @@ -733,7 +733,7 @@ TRACE_EVENT(nfs4_close, __field(u32, fhandle) __field(u64, fileid) __field(unsigned int, fmode) - __field(int, error) + __field(unsigned long, error) __field(int, stateid_seq) __field(u32, stateid_hash) ), @@ -753,9 +753,9 @@ TRACE_EVENT(nfs4_close, ), TP_printk( - "error=%d (%s) fmode=%s fileid=%02x:%02x:%llu " + "error=%ld (%s) fmode=%s fileid=%02x:%02x:%llu " "fhandle=0x%08x openstateid=%d:0x%08x", - __entry->error, + -__entry->error, show_nfsv4_errors(__entry->error), __entry->fmode ? show_fmode_flags(__entry->fmode) : "closed", @@ -795,7 +795,7 @@ DECLARE_EVENT_CLASS(nfs4_lock_event, TP_ARGS(request, state, cmd, error), TP_STRUCT__entry( - __field(int, error) + __field(unsigned long, error) __field(int, cmd) __field(char, type) __field(loff_t, start) @@ -825,10 +825,10 @@ DECLARE_EVENT_CLASS(nfs4_lock_event, ), TP_printk( - "error=%d (%s) cmd=%s:%s range=%lld:%lld " + "error=%ld (%s) cmd=%s:%s range=%lld:%lld " "fileid=%02x:%02x:%llu fhandle=0x%08x " "stateid=%d:0x%08x", - __entry->error, + -__entry->error, show_nfsv4_errors(__entry->error), show_lock_cmd(__entry->cmd), show_lock_type(__entry->type), @@ -865,7 +865,7 @@ TRACE_EVENT(nfs4_set_lock, TP_ARGS(request, state, lockstateid, cmd, error), TP_STRUCT__entry( - __field(int, error) + __field(unsigned long, error) __field(int, cmd) __field(char, type) __field(loff_t, start) @@ -901,10 +901,10 @@ TRACE_EVENT(nfs4_set_lock, ), TP_printk( - "error=%d (%s) cmd=%s:%s range=%lld:%lld " + "error=%ld (%s) cmd=%s:%s range=%lld:%lld " "fileid=%02x:%02x:%llu fhandle=0x%08x " "stateid=%d:0x%08x lockstateid=%d:0x%08x", - __entry->error, + -__entry->error, show_nfsv4_errors(__entry->error), show_lock_cmd(__entry->cmd), show_lock_type(__entry->type), @@ -970,7 +970,7 @@ TRACE_EVENT(nfs4_delegreturn_exit, TP_STRUCT__entry( __field(dev_t, dev) __field(u32, fhandle) - __field(int, error) + __field(unsigned long, error) __field(int, stateid_seq) __field(u32, stateid_hash) ), @@ -986,9 +986,9 @@ TRACE_EVENT(nfs4_delegreturn_exit, ), TP_printk( - "error=%d (%s) dev=%02x:%02x fhandle=0x%08x " + "error=%ld (%s) dev=%02x:%02x fhandle=0x%08x " "stateid=%d:0x%08x", - __entry->error, + -__entry->error, show_nfsv4_errors(__entry->error), MAJOR(__entry->dev), MINOR(__entry->dev), __entry->fhandle, @@ -1007,7 +1007,7 @@ DECLARE_EVENT_CLASS(nfs4_test_stateid_event, TP_ARGS(state, lsp, error), TP_STRUCT__entry( - __field(int, error) + __field(unsigned long, error) __field(dev_t, dev) __field(u32, fhandle) __field(u64, fileid) @@ -1029,9 +1029,9 @@ DECLARE_EVENT_CLASS(nfs4_test_stateid_event, ), TP_printk( - "error=%d (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " + "error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " "stateid=%d:0x%08x", - __entry->error, + -__entry->error, show_nfsv4_errors(__entry->error), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, @@ -1064,7 +1064,7 @@ DECLARE_EVENT_CLASS(nfs4_lookup_event, TP_STRUCT__entry( __field(dev_t, dev) - __field(int, error) + __field(unsigned long, error) __field(u64, dir) __string(name, name->name) ), @@ -1072,13 +1072,13 @@ DECLARE_EVENT_CLASS(nfs4_lookup_event, TP_fast_assign( __entry->dev = dir->i_sb->s_dev; __entry->dir = NFS_FILEID(dir); - __entry->error = error; + __entry->error = -error; __assign_str(name, name->name); ), TP_printk( - "error=%d (%s) name=%02x:%02x:%llu/%s", - __entry->error, + "error=%ld (%s) name=%02x:%02x:%llu/%s", + -__entry->error, show_nfsv4_errors(__entry->error), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->dir, @@ -1114,7 +1114,7 @@ TRACE_EVENT(nfs4_lookupp, TP_STRUCT__entry( __field(dev_t, dev) __field(u64, ino) - __field(int, error) + __field(unsigned long, error) ), TP_fast_assign( @@ -1124,8 +1124,8 @@ TRACE_EVENT(nfs4_lookupp, ), TP_printk( - "error=%d (%s) inode=%02x:%02x:%llu", - __entry->error, + "error=%ld (%s) inode=%02x:%02x:%llu", + -__entry->error, show_nfsv4_errors(__entry->error), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->ino @@ -1145,7 +1145,7 @@ TRACE_EVENT(nfs4_rename, TP_STRUCT__entry( __field(dev_t, dev) - __field(int, error) + __field(unsigned long, error) __field(u64, olddir) __string(oldname, oldname->name) __field(u64, newdir) @@ -1162,9 +1162,9 @@ TRACE_EVENT(nfs4_rename, ), TP_printk( - "error=%d (%s) oldname=%02x:%02x:%llu/%s " + "error=%ld (%s) oldname=%02x:%02x:%llu/%s " "newname=%02x:%02x:%llu/%s", - __entry->error, + -__entry->error, show_nfsv4_errors(__entry->error), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->olddir, @@ -1187,19 +1187,19 @@ DECLARE_EVENT_CLASS(nfs4_inode_event, __field(dev_t, dev) __field(u32, fhandle) __field(u64, fileid) - __field(int, error) + __field(unsigned long, error) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->fileid = NFS_FILEID(inode); __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode)); - __entry->error = error; + __entry->error = error < 0 ? -error : 0; ), TP_printk( - "error=%d (%s) fileid=%02x:%02x:%llu fhandle=0x%08x", - __entry->error, + "error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x", + -__entry->error, show_nfsv4_errors(__entry->error), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, @@ -1238,7 +1238,7 @@ DECLARE_EVENT_CLASS(nfs4_inode_stateid_event, __field(dev_t, dev) __field(u32, fhandle) __field(u64, fileid) - __field(int, error) + __field(unsigned long, error) __field(int, stateid_seq) __field(u32, stateid_hash) ), @@ -1255,9 +1255,9 @@ DECLARE_EVENT_CLASS(nfs4_inode_stateid_event, ), TP_printk( - "error=%d (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " + "error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " "stateid=%d:0x%08x", - __entry->error, + -__entry->error, show_nfsv4_errors(__entry->error), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, @@ -1295,7 +1295,7 @@ DECLARE_EVENT_CLASS(nfs4_getattr_event, __field(u32, fhandle) __field(u64, fileid) __field(unsigned int, valid) - __field(int, error) + __field(unsigned long, error) ), TP_fast_assign( @@ -1307,9 +1307,9 @@ DECLARE_EVENT_CLASS(nfs4_getattr_event, ), TP_printk( - "error=%d (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " + "error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " "valid=%s", - __entry->error, + -__entry->error, show_nfsv4_errors(__entry->error), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, @@ -1342,7 +1342,7 @@ DECLARE_EVENT_CLASS(nfs4_inode_callback_event, TP_ARGS(clp, fhandle, inode, error), TP_STRUCT__entry( - __field(int, error) + __field(unsigned long, error) __field(dev_t, dev) __field(u32, fhandle) __field(u64, fileid) @@ -1363,9 +1363,9 @@ DECLARE_EVENT_CLASS(nfs4_inode_callback_event, ), TP_printk( - "error=%d (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " + "error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " "dstaddr=%s", - __entry->error, + -__entry->error, show_nfsv4_errors(__entry->error), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, @@ -1397,7 +1397,7 @@ DECLARE_EVENT_CLASS(nfs4_inode_stateid_callback_event, TP_ARGS(clp, fhandle, inode, stateid, error), TP_STRUCT__entry( - __field(int, error) + __field(unsigned long, error) __field(dev_t, dev) __field(u32, fhandle) __field(u64, fileid) @@ -1424,9 +1424,9 @@ DECLARE_EVENT_CLASS(nfs4_inode_stateid_callback_event, ), TP_printk( - "error=%d (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " + "error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " "stateid=%d:0x%08x dstaddr=%s", - __entry->error, + -__entry->error, show_nfsv4_errors(__entry->error), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, @@ -1460,7 +1460,7 @@ DECLARE_EVENT_CLASS(nfs4_idmap_event, TP_ARGS(name, len, id, error), TP_STRUCT__entry( - __field(int, error) + __field(unsigned long, error) __field(u32, id) __dynamic_array(char, name, len > 0 ? len + 1 : 1) ), @@ -1475,8 +1475,8 @@ DECLARE_EVENT_CLASS(nfs4_idmap_event, ), TP_printk( - "error=%d id=%u name=%s", - __entry->error, + "error=%ld (%s) id=%u name=%s", + -__entry->error, show_nfsv4_errors(__entry->error), __entry->id, __get_str(name) ) @@ -1509,7 +1509,7 @@ DECLARE_EVENT_CLASS(nfs4_read_event, __field(u64, fileid) __field(loff_t, offset) __field(size_t, count) - __field(int, error) + __field(unsigned long, error) __field(int, stateid_seq) __field(u32, stateid_hash) ), @@ -1523,7 +1523,7 @@ DECLARE_EVENT_CLASS(nfs4_read_event, __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode)); __entry->offset = hdr->args.offset; __entry->count = hdr->args.count; - __entry->error = error; + __entry->error = error < 0 ? -error : 0; __entry->stateid_seq = be32_to_cpu(state->stateid.seqid); __entry->stateid_hash = @@ -1531,9 +1531,9 @@ DECLARE_EVENT_CLASS(nfs4_read_event, ), TP_printk( - "error=%d (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " + "error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " "offset=%lld count=%zu stateid=%d:0x%08x", - __entry->error, + -__entry->error, show_nfsv4_errors(__entry->error), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, @@ -1569,7 +1569,7 @@ DECLARE_EVENT_CLASS(nfs4_write_event, __field(u64, fileid) __field(loff_t, offset) __field(size_t, count) - __field(int, error) + __field(unsigned long, error) __field(int, stateid_seq) __field(u32, stateid_hash) ), @@ -1583,7 +1583,7 @@ DECLARE_EVENT_CLASS(nfs4_write_event, __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode)); __entry->offset = hdr->args.offset; __entry->count = hdr->args.count; - __entry->error = error; + __entry->error = error < 0 ? -error : 0; __entry->stateid_seq = be32_to_cpu(state->stateid.seqid); __entry->stateid_hash = @@ -1591,9 +1591,9 @@ DECLARE_EVENT_CLASS(nfs4_write_event, ), TP_printk( - "error=%d (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " + "error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " "offset=%lld count=%zu stateid=%d:0x%08x", - __entry->error, + -__entry->error, show_nfsv4_errors(__entry->error), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, @@ -1630,7 +1630,7 @@ DECLARE_EVENT_CLASS(nfs4_commit_event, __field(u64, fileid) __field(loff_t, offset) __field(size_t, count) - __field(int, error) + __field(unsigned long, error) ), TP_fast_assign( @@ -1644,9 +1644,9 @@ DECLARE_EVENT_CLASS(nfs4_commit_event, ), TP_printk( - "error=%d (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " + "error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " "offset=%lld count=%zu", - __entry->error, + -__entry->error, show_nfsv4_errors(__entry->error), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, @@ -1694,7 +1694,7 @@ TRACE_EVENT(nfs4_layoutget, __field(u32, iomode) __field(u64, offset) __field(u64, count) - __field(int, error) + __field(unsigned long, error) __field(int, stateid_seq) __field(u32, stateid_hash) __field(int, layoutstateid_seq) @@ -1727,10 +1727,10 @@ TRACE_EVENT(nfs4_layoutget, ), TP_printk( - "error=%d (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " + "error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " "iomode=%s offset=%llu count=%llu stateid=%d:0x%08x " "layoutstateid=%d:0x%08x", - __entry->error, + -__entry->error, show_nfsv4_errors(__entry->error), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, -- cgit v1.2.3-59-g8ed1b From 38a638a72a349c6afb48ca1612b4e58bbe168ff8 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 19 Jun 2019 10:33:58 -0400 Subject: NFS: Display symbolic status code names in trace log For improved readability, add nfs_show_status() call-sites in the generic NFS trace points so that the symbolic status code name is displayed. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- fs/nfs/nfstrace.h | 76 ++++++++++++++++++++++++++++++------------------------- 1 file changed, 41 insertions(+), 35 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfstrace.h b/fs/nfs/nfstrace.h index a0d6910aa03a..9dd8765412fd 100644 --- a/fs/nfs/nfstrace.h +++ b/fs/nfs/nfstrace.h @@ -83,7 +83,7 @@ DECLARE_EVENT_CLASS(nfs_inode_event_done, TP_ARGS(inode, error), TP_STRUCT__entry( - __field(int, error) + __field(unsigned long, error) __field(dev_t, dev) __field(u32, fhandle) __field(unsigned char, type) @@ -96,7 +96,7 @@ DECLARE_EVENT_CLASS(nfs_inode_event_done, TP_fast_assign( const struct nfs_inode *nfsi = NFS_I(inode); - __entry->error = error; + __entry->error = error < 0 ? -error : 0; __entry->dev = inode->i_sb->s_dev; __entry->fileid = nfsi->fileid; __entry->fhandle = nfs_fhandle_hash(&nfsi->fh); @@ -108,10 +108,10 @@ DECLARE_EVENT_CLASS(nfs_inode_event_done, ), TP_printk( - "error=%d fileid=%02x:%02x:%llu fhandle=0x%08x " + "error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " "type=%u (%s) version=%llu size=%lld " "cache_validity=%lu (%s) nfs_flags=%ld (%s)", - __entry->error, + -__entry->error, nfs_show_status(__entry->error), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, @@ -219,7 +219,7 @@ DECLARE_EVENT_CLASS(nfs_lookup_event_done, TP_ARGS(dir, dentry, flags, error), TP_STRUCT__entry( - __field(int, error) + __field(unsigned long, error) __field(unsigned int, flags) __field(dev_t, dev) __field(u64, dir) @@ -229,14 +229,14 @@ DECLARE_EVENT_CLASS(nfs_lookup_event_done, TP_fast_assign( __entry->dev = dir->i_sb->s_dev; __entry->dir = NFS_FILEID(dir); - __entry->error = error; + __entry->error = error < 0 ? -error : 0; __entry->flags = flags; __assign_str(name, dentry->d_name.name); ), TP_printk( - "error=%d flags=%u (%s) name=%02x:%02x:%llu/%s", - __entry->error, + "error=%ld (%s) flags=%u (%s) name=%02x:%02x:%llu/%s", + -__entry->error, nfs_show_status(__entry->error), __entry->flags, show_lookup_flags(__entry->flags), MAJOR(__entry->dev), MINOR(__entry->dev), @@ -323,7 +323,7 @@ TRACE_EVENT(nfs_atomic_open_exit, TP_ARGS(dir, ctx, flags, error), TP_STRUCT__entry( - __field(int, error) + __field(unsigned long, error) __field(unsigned int, flags) __field(unsigned int, fmode) __field(dev_t, dev) @@ -332,7 +332,7 @@ TRACE_EVENT(nfs_atomic_open_exit, ), TP_fast_assign( - __entry->error = error; + __entry->error = -error; __entry->dev = dir->i_sb->s_dev; __entry->dir = NFS_FILEID(dir); __entry->flags = flags; @@ -341,9 +341,9 @@ TRACE_EVENT(nfs_atomic_open_exit, ), TP_printk( - "error=%d flags=%u (%s) fmode=%s " + "error=%ld (%s) flags=%u (%s) fmode=%s " "name=%02x:%02x:%llu/%s", - __entry->error, + -__entry->error, nfs_show_status(__entry->error), __entry->flags, show_open_flags(__entry->flags), show_fmode_flags(__entry->fmode), @@ -397,7 +397,7 @@ TRACE_EVENT(nfs_create_exit, TP_ARGS(dir, dentry, flags, error), TP_STRUCT__entry( - __field(int, error) + __field(unsigned long, error) __field(unsigned int, flags) __field(dev_t, dev) __field(u64, dir) @@ -405,7 +405,7 @@ TRACE_EVENT(nfs_create_exit, ), TP_fast_assign( - __entry->error = error; + __entry->error = -error; __entry->dev = dir->i_sb->s_dev; __entry->dir = NFS_FILEID(dir); __entry->flags = flags; @@ -413,8 +413,8 @@ TRACE_EVENT(nfs_create_exit, ), TP_printk( - "error=%d flags=%u (%s) name=%02x:%02x:%llu/%s", - __entry->error, + "error=%ld (%s) flags=%u (%s) name=%02x:%02x:%llu/%s", + -__entry->error, nfs_show_status(__entry->error), __entry->flags, show_open_flags(__entry->flags), MAJOR(__entry->dev), MINOR(__entry->dev), @@ -469,7 +469,7 @@ DECLARE_EVENT_CLASS(nfs_directory_event_done, TP_ARGS(dir, dentry, error), TP_STRUCT__entry( - __field(int, error) + __field(unsigned long, error) __field(dev_t, dev) __field(u64, dir) __string(name, dentry->d_name.name) @@ -478,13 +478,13 @@ DECLARE_EVENT_CLASS(nfs_directory_event_done, TP_fast_assign( __entry->dev = dir->i_sb->s_dev; __entry->dir = NFS_FILEID(dir); - __entry->error = error; + __entry->error = error < 0 ? -error : 0; __assign_str(name, dentry->d_name.name); ), TP_printk( - "error=%d name=%02x:%02x:%llu/%s", - __entry->error, + "error=%ld (%s) name=%02x:%02x:%llu/%s", + -__entry->error, nfs_show_status(__entry->error), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->dir, __get_str(name) @@ -557,7 +557,7 @@ TRACE_EVENT(nfs_link_exit, TP_ARGS(inode, dir, dentry, error), TP_STRUCT__entry( - __field(int, error) + __field(unsigned long, error) __field(dev_t, dev) __field(u64, fileid) __field(u64, dir) @@ -568,13 +568,13 @@ TRACE_EVENT(nfs_link_exit, __entry->dev = inode->i_sb->s_dev; __entry->fileid = NFS_FILEID(inode); __entry->dir = NFS_FILEID(dir); - __entry->error = error; + __entry->error = error < 0 ? -error : 0; __assign_str(name, dentry->d_name.name); ), TP_printk( - "error=%d fileid=%02x:%02x:%llu name=%02x:%02x:%llu/%s", - __entry->error, + "error=%ld (%s) fileid=%02x:%02x:%llu name=%02x:%02x:%llu/%s", + -__entry->error, nfs_show_status(__entry->error), MAJOR(__entry->dev), MINOR(__entry->dev), __entry->fileid, MAJOR(__entry->dev), MINOR(__entry->dev), @@ -642,7 +642,7 @@ DECLARE_EVENT_CLASS(nfs_rename_event_done, TP_STRUCT__entry( __field(dev_t, dev) - __field(int, error) + __field(unsigned long, error) __field(u64, old_dir) __string(old_name, old_dentry->d_name.name) __field(u64, new_dir) @@ -651,17 +651,17 @@ DECLARE_EVENT_CLASS(nfs_rename_event_done, TP_fast_assign( __entry->dev = old_dir->i_sb->s_dev; + __entry->error = -error; __entry->old_dir = NFS_FILEID(old_dir); __entry->new_dir = NFS_FILEID(new_dir); - __entry->error = error; __assign_str(old_name, old_dentry->d_name.name); __assign_str(new_name, new_dentry->d_name.name); ), TP_printk( - "error=%d old_name=%02x:%02x:%llu/%s " + "error=%ld (%s) old_name=%02x:%02x:%llu/%s " "new_name=%02x:%02x:%llu/%s", - __entry->error, + -__entry->error, nfs_show_status(__entry->error), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->old_dir, __get_str(old_name), @@ -697,7 +697,7 @@ TRACE_EVENT(nfs_sillyrename_unlink, TP_STRUCT__entry( __field(dev_t, dev) - __field(int, error) + __field(unsigned long, error) __field(u64, dir) __dynamic_array(char, name, data->args.name.len + 1) ), @@ -707,15 +707,15 @@ TRACE_EVENT(nfs_sillyrename_unlink, size_t len = data->args.name.len; __entry->dev = dir->i_sb->s_dev; __entry->dir = NFS_FILEID(dir); - __entry->error = error; + __entry->error = -error; memcpy(__get_str(name), data->args.name.name, len); __get_str(name)[len] = 0; ), TP_printk( - "error=%d name=%02x:%02x:%llu/%s", - __entry->error, + "error=%ld (%s) name=%02x:%02x:%llu/%s", + -__entry->error, nfs_show_status(__entry->error), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->dir, __get_str(name) @@ -974,6 +974,8 @@ TRACE_DEFINE_ENUM(NFSERR_PERM); TRACE_DEFINE_ENUM(NFSERR_NOENT); TRACE_DEFINE_ENUM(NFSERR_IO); TRACE_DEFINE_ENUM(NFSERR_NXIO); +TRACE_DEFINE_ENUM(ECHILD); +TRACE_DEFINE_ENUM(NFSERR_EAGAIN); TRACE_DEFINE_ENUM(NFSERR_ACCES); TRACE_DEFINE_ENUM(NFSERR_EXIST); TRACE_DEFINE_ENUM(NFSERR_XDEV); @@ -985,6 +987,7 @@ TRACE_DEFINE_ENUM(NFSERR_FBIG); TRACE_DEFINE_ENUM(NFSERR_NOSPC); TRACE_DEFINE_ENUM(NFSERR_ROFS); TRACE_DEFINE_ENUM(NFSERR_MLINK); +TRACE_DEFINE_ENUM(NFSERR_OPNOTSUPP); TRACE_DEFINE_ENUM(NFSERR_NAMETOOLONG); TRACE_DEFINE_ENUM(NFSERR_NOTEMPTY); TRACE_DEFINE_ENUM(NFSERR_DQUOT); @@ -1007,6 +1010,8 @@ TRACE_DEFINE_ENUM(NFSERR_JUKEBOX); { NFSERR_NOENT, "NOENT" }, \ { NFSERR_IO, "IO" }, \ { NFSERR_NXIO, "NXIO" }, \ + { ECHILD, "CHILD" }, \ + { NFSERR_EAGAIN, "AGAIN" }, \ { NFSERR_ACCES, "ACCES" }, \ { NFSERR_EXIST, "EXIST" }, \ { NFSERR_XDEV, "XDEV" }, \ @@ -1018,6 +1023,7 @@ TRACE_DEFINE_ENUM(NFSERR_JUKEBOX); { NFSERR_NOSPC, "NOSPC" }, \ { NFSERR_ROFS, "ROFS" }, \ { NFSERR_MLINK, "MLINK" }, \ + { NFSERR_OPNOTSUPP, "OPNOTSUPP" }, \ { NFSERR_NAMETOOLONG, "NAMETOOLONG" }, \ { NFSERR_NOTEMPTY, "NOTEMPTY" }, \ { NFSERR_DQUOT, "DQUOT" }, \ @@ -1041,7 +1047,7 @@ TRACE_EVENT(nfs_xdr_status, TP_ARGS(error), TP_STRUCT__entry( - __field(int, error) + __field(unsigned long, error) ), TP_fast_assign( @@ -1049,8 +1055,8 @@ TRACE_EVENT(nfs_xdr_status, ), TP_printk( - "error=%d (%s)", - __entry->error, nfs_show_status(__entry->error) + "error=%ld (%s)", + -__entry->error, nfs_show_status(__entry->error) ) ); -- cgit v1.2.3-59-g8ed1b From 7d4006c161ad8cded95f80f43b5fecc36e781497 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 19 Jun 2019 10:34:03 -0400 Subject: NFS: Update symbolic flags displayed by trace events Add missing symbolic flag names and display flags variables in hexadecimal to improve observability. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- fs/nfs/nfstrace.h | 150 ++++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 124 insertions(+), 26 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfstrace.h b/fs/nfs/nfstrace.h index 9dd8765412fd..cd09356e4772 100644 --- a/fs/nfs/nfstrace.h +++ b/fs/nfs/nfstrace.h @@ -11,6 +11,16 @@ #include #include +TRACE_DEFINE_ENUM(DT_UNKNOWN); +TRACE_DEFINE_ENUM(DT_FIFO); +TRACE_DEFINE_ENUM(DT_CHR); +TRACE_DEFINE_ENUM(DT_DIR); +TRACE_DEFINE_ENUM(DT_BLK); +TRACE_DEFINE_ENUM(DT_REG); +TRACE_DEFINE_ENUM(DT_LNK); +TRACE_DEFINE_ENUM(DT_SOCK); +TRACE_DEFINE_ENUM(DT_WHT); + #define nfs_show_file_type(ftype) \ __print_symbolic(ftype, \ { DT_UNKNOWN, "UNKNOWN" }, \ @@ -23,25 +33,57 @@ { DT_SOCK, "SOCK" }, \ { DT_WHT, "WHT" }) +TRACE_DEFINE_ENUM(NFS_INO_INVALID_DATA); +TRACE_DEFINE_ENUM(NFS_INO_INVALID_ATIME); +TRACE_DEFINE_ENUM(NFS_INO_INVALID_ACCESS); +TRACE_DEFINE_ENUM(NFS_INO_INVALID_ACL); +TRACE_DEFINE_ENUM(NFS_INO_REVAL_PAGECACHE); +TRACE_DEFINE_ENUM(NFS_INO_REVAL_FORCED); +TRACE_DEFINE_ENUM(NFS_INO_INVALID_LABEL); +TRACE_DEFINE_ENUM(NFS_INO_INVALID_CHANGE); +TRACE_DEFINE_ENUM(NFS_INO_INVALID_CTIME); +TRACE_DEFINE_ENUM(NFS_INO_INVALID_MTIME); +TRACE_DEFINE_ENUM(NFS_INO_INVALID_SIZE); +TRACE_DEFINE_ENUM(NFS_INO_INVALID_OTHER); + #define nfs_show_cache_validity(v) \ __print_flags(v, "|", \ - { NFS_INO_INVALID_ATTR, "INVALID_ATTR" }, \ { NFS_INO_INVALID_DATA, "INVALID_DATA" }, \ { NFS_INO_INVALID_ATIME, "INVALID_ATIME" }, \ { NFS_INO_INVALID_ACCESS, "INVALID_ACCESS" }, \ { NFS_INO_INVALID_ACL, "INVALID_ACL" }, \ { NFS_INO_REVAL_PAGECACHE, "REVAL_PAGECACHE" }, \ { NFS_INO_REVAL_FORCED, "REVAL_FORCED" }, \ - { NFS_INO_INVALID_LABEL, "INVALID_LABEL" }) + { NFS_INO_INVALID_LABEL, "INVALID_LABEL" }, \ + { NFS_INO_INVALID_CHANGE, "INVALID_CHANGE" }, \ + { NFS_INO_INVALID_CTIME, "INVALID_CTIME" }, \ + { NFS_INO_INVALID_MTIME, "INVALID_MTIME" }, \ + { NFS_INO_INVALID_SIZE, "INVALID_SIZE" }, \ + { NFS_INO_INVALID_OTHER, "INVALID_OTHER" }) + +TRACE_DEFINE_ENUM(NFS_INO_ADVISE_RDPLUS); +TRACE_DEFINE_ENUM(NFS_INO_STALE); +TRACE_DEFINE_ENUM(NFS_INO_ACL_LRU_SET); +TRACE_DEFINE_ENUM(NFS_INO_INVALIDATING); +TRACE_DEFINE_ENUM(NFS_INO_FSCACHE); +TRACE_DEFINE_ENUM(NFS_INO_FSCACHE_LOCK); +TRACE_DEFINE_ENUM(NFS_INO_LAYOUTCOMMIT); +TRACE_DEFINE_ENUM(NFS_INO_LAYOUTCOMMITTING); +TRACE_DEFINE_ENUM(NFS_INO_LAYOUTSTATS); +TRACE_DEFINE_ENUM(NFS_INO_ODIRECT); #define nfs_show_nfsi_flags(v) \ __print_flags(v, "|", \ - { 1 << NFS_INO_ADVISE_RDPLUS, "ADVISE_RDPLUS" }, \ - { 1 << NFS_INO_STALE, "STALE" }, \ - { 1 << NFS_INO_INVALIDATING, "INVALIDATING" }, \ - { 1 << NFS_INO_FSCACHE, "FSCACHE" }, \ - { 1 << NFS_INO_LAYOUTCOMMIT, "NEED_LAYOUTCOMMIT" }, \ - { 1 << NFS_INO_LAYOUTCOMMITTING, "LAYOUTCOMMIT" }) + { BIT(NFS_INO_ADVISE_RDPLUS), "ADVISE_RDPLUS" }, \ + { BIT(NFS_INO_STALE), "STALE" }, \ + { BIT(NFS_INO_ACL_LRU_SET), "ACL_LRU_SET" }, \ + { BIT(NFS_INO_INVALIDATING), "INVALIDATING" }, \ + { BIT(NFS_INO_FSCACHE), "FSCACHE" }, \ + { BIT(NFS_INO_FSCACHE_LOCK), "FSCACHE_LOCK" }, \ + { BIT(NFS_INO_LAYOUTCOMMIT), "NEED_LAYOUTCOMMIT" }, \ + { BIT(NFS_INO_LAYOUTCOMMITTING), "LAYOUTCOMMIT" }, \ + { BIT(NFS_INO_LAYOUTSTATS), "LAYOUTSTATS" }, \ + { BIT(NFS_INO_ODIRECT), "ODIRECT" }) DECLARE_EVENT_CLASS(nfs_inode_event, TP_PROTO( @@ -110,7 +152,7 @@ DECLARE_EVENT_CLASS(nfs_inode_event_done, TP_printk( "error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " "type=%u (%s) version=%llu size=%lld " - "cache_validity=%lu (%s) nfs_flags=%ld (%s)", + "cache_validity=0x%lx (%s) nfs_flags=0x%lx (%s)", -__entry->error, nfs_show_status(__entry->error), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, @@ -158,13 +200,41 @@ DEFINE_NFS_INODE_EVENT_DONE(nfs_fsync_exit); DEFINE_NFS_INODE_EVENT(nfs_access_enter); DEFINE_NFS_INODE_EVENT_DONE(nfs_access_exit); +TRACE_DEFINE_ENUM(LOOKUP_FOLLOW); +TRACE_DEFINE_ENUM(LOOKUP_DIRECTORY); +TRACE_DEFINE_ENUM(LOOKUP_AUTOMOUNT); +TRACE_DEFINE_ENUM(LOOKUP_PARENT); +TRACE_DEFINE_ENUM(LOOKUP_REVAL); +TRACE_DEFINE_ENUM(LOOKUP_RCU); +TRACE_DEFINE_ENUM(LOOKUP_NO_REVAL); +TRACE_DEFINE_ENUM(LOOKUP_NO_EVAL); +TRACE_DEFINE_ENUM(LOOKUP_OPEN); +TRACE_DEFINE_ENUM(LOOKUP_CREATE); +TRACE_DEFINE_ENUM(LOOKUP_EXCL); +TRACE_DEFINE_ENUM(LOOKUP_RENAME_TARGET); +TRACE_DEFINE_ENUM(LOOKUP_JUMPED); +TRACE_DEFINE_ENUM(LOOKUP_ROOT); +TRACE_DEFINE_ENUM(LOOKUP_EMPTY); +TRACE_DEFINE_ENUM(LOOKUP_DOWN); + #define show_lookup_flags(flags) \ - __print_flags((unsigned long)flags, "|", \ - { LOOKUP_AUTOMOUNT, "AUTOMOUNT" }, \ + __print_flags(flags, "|", \ + { LOOKUP_FOLLOW, "FOLLOW" }, \ { LOOKUP_DIRECTORY, "DIRECTORY" }, \ + { LOOKUP_AUTOMOUNT, "AUTOMOUNT" }, \ + { LOOKUP_PARENT, "PARENT" }, \ + { LOOKUP_REVAL, "REVAL" }, \ + { LOOKUP_RCU, "RCU" }, \ + { LOOKUP_NO_REVAL, "NO_REVAL" }, \ + { LOOKUP_NO_EVAL, "NO_EVAL" }, \ { LOOKUP_OPEN, "OPEN" }, \ { LOOKUP_CREATE, "CREATE" }, \ - { LOOKUP_EXCL, "EXCL" }) + { LOOKUP_EXCL, "EXCL" }, \ + { LOOKUP_RENAME_TARGET, "RENAME_TARGET" }, \ + { LOOKUP_JUMPED, "JUMPED" }, \ + { LOOKUP_ROOT, "ROOT" }, \ + { LOOKUP_EMPTY, "EMPTY" }, \ + { LOOKUP_DOWN, "DOWN" }) DECLARE_EVENT_CLASS(nfs_lookup_event, TP_PROTO( @@ -176,7 +246,7 @@ DECLARE_EVENT_CLASS(nfs_lookup_event, TP_ARGS(dir, dentry, flags), TP_STRUCT__entry( - __field(unsigned int, flags) + __field(unsigned long, flags) __field(dev_t, dev) __field(u64, dir) __string(name, dentry->d_name.name) @@ -190,7 +260,7 @@ DECLARE_EVENT_CLASS(nfs_lookup_event, ), TP_printk( - "flags=%u (%s) name=%02x:%02x:%llu/%s", + "flags=0x%lx (%s) name=%02x:%02x:%llu/%s", __entry->flags, show_lookup_flags(__entry->flags), MAJOR(__entry->dev), MINOR(__entry->dev), @@ -220,7 +290,7 @@ DECLARE_EVENT_CLASS(nfs_lookup_event_done, TP_STRUCT__entry( __field(unsigned long, error) - __field(unsigned int, flags) + __field(unsigned long, flags) __field(dev_t, dev) __field(u64, dir) __string(name, dentry->d_name.name) @@ -235,7 +305,7 @@ DECLARE_EVENT_CLASS(nfs_lookup_event_done, ), TP_printk( - "error=%ld (%s) flags=%u (%s) name=%02x:%02x:%llu/%s", + "error=%ld (%s) flags=0x%lx (%s) name=%02x:%02x:%llu/%s", -__entry->error, nfs_show_status(__entry->error), __entry->flags, show_lookup_flags(__entry->flags), @@ -260,15 +330,43 @@ DEFINE_NFS_LOOKUP_EVENT_DONE(nfs_lookup_exit); DEFINE_NFS_LOOKUP_EVENT(nfs_lookup_revalidate_enter); DEFINE_NFS_LOOKUP_EVENT_DONE(nfs_lookup_revalidate_exit); +TRACE_DEFINE_ENUM(O_WRONLY); +TRACE_DEFINE_ENUM(O_RDWR); +TRACE_DEFINE_ENUM(O_CREAT); +TRACE_DEFINE_ENUM(O_EXCL); +TRACE_DEFINE_ENUM(O_NOCTTY); +TRACE_DEFINE_ENUM(O_TRUNC); +TRACE_DEFINE_ENUM(O_APPEND); +TRACE_DEFINE_ENUM(O_NONBLOCK); +TRACE_DEFINE_ENUM(O_DSYNC); +TRACE_DEFINE_ENUM(O_DIRECT); +TRACE_DEFINE_ENUM(O_LARGEFILE); +TRACE_DEFINE_ENUM(O_DIRECTORY); +TRACE_DEFINE_ENUM(O_NOFOLLOW); +TRACE_DEFINE_ENUM(O_NOATIME); +TRACE_DEFINE_ENUM(O_CLOEXEC); + #define show_open_flags(flags) \ - __print_flags((unsigned long)flags, "|", \ + __print_flags(flags, "|", \ + { O_WRONLY, "O_WRONLY" }, \ + { O_RDWR, "O_RDWR" }, \ { O_CREAT, "O_CREAT" }, \ { O_EXCL, "O_EXCL" }, \ + { O_NOCTTY, "O_NOCTTY" }, \ { O_TRUNC, "O_TRUNC" }, \ { O_APPEND, "O_APPEND" }, \ + { O_NONBLOCK, "O_NONBLOCK" }, \ { O_DSYNC, "O_DSYNC" }, \ { O_DIRECT, "O_DIRECT" }, \ - { O_DIRECTORY, "O_DIRECTORY" }) + { O_LARGEFILE, "O_LARGEFILE" }, \ + { O_DIRECTORY, "O_DIRECTORY" }, \ + { O_NOFOLLOW, "O_NOFOLLOW" }, \ + { O_NOATIME, "O_NOATIME" }, \ + { O_CLOEXEC, "O_CLOEXEC" }) + +TRACE_DEFINE_ENUM(FMODE_READ); +TRACE_DEFINE_ENUM(FMODE_WRITE); +TRACE_DEFINE_ENUM(FMODE_EXEC); #define show_fmode_flags(mode) \ __print_flags(mode, "|", \ @@ -286,7 +384,7 @@ TRACE_EVENT(nfs_atomic_open_enter, TP_ARGS(dir, ctx, flags), TP_STRUCT__entry( - __field(unsigned int, flags) + __field(unsigned long, flags) __field(unsigned int, fmode) __field(dev_t, dev) __field(u64, dir) @@ -302,7 +400,7 @@ TRACE_EVENT(nfs_atomic_open_enter, ), TP_printk( - "flags=%u (%s) fmode=%s name=%02x:%02x:%llu/%s", + "flags=0x%lx (%s) fmode=%s name=%02x:%02x:%llu/%s", __entry->flags, show_open_flags(__entry->flags), show_fmode_flags(__entry->fmode), @@ -324,7 +422,7 @@ TRACE_EVENT(nfs_atomic_open_exit, TP_STRUCT__entry( __field(unsigned long, error) - __field(unsigned int, flags) + __field(unsigned long, flags) __field(unsigned int, fmode) __field(dev_t, dev) __field(u64, dir) @@ -341,7 +439,7 @@ TRACE_EVENT(nfs_atomic_open_exit, ), TP_printk( - "error=%ld (%s) flags=%u (%s) fmode=%s " + "error=%ld (%s) flags=0x%lx (%s) fmode=%s " "name=%02x:%02x:%llu/%s", -__entry->error, nfs_show_status(__entry->error), __entry->flags, @@ -363,7 +461,7 @@ TRACE_EVENT(nfs_create_enter, TP_ARGS(dir, dentry, flags), TP_STRUCT__entry( - __field(unsigned int, flags) + __field(unsigned long, flags) __field(dev_t, dev) __field(u64, dir) __string(name, dentry->d_name.name) @@ -377,7 +475,7 @@ TRACE_EVENT(nfs_create_enter, ), TP_printk( - "flags=%u (%s) name=%02x:%02x:%llu/%s", + "flags=0x%lx (%s) name=%02x:%02x:%llu/%s", __entry->flags, show_open_flags(__entry->flags), MAJOR(__entry->dev), MINOR(__entry->dev), @@ -398,7 +496,7 @@ TRACE_EVENT(nfs_create_exit, TP_STRUCT__entry( __field(unsigned long, error) - __field(unsigned int, flags) + __field(unsigned long, flags) __field(dev_t, dev) __field(u64, dir) __string(name, dentry->d_name.name) @@ -413,7 +511,7 @@ TRACE_EVENT(nfs_create_exit, ), TP_printk( - "error=%ld (%s) flags=%u (%s) name=%02x:%02x:%llu/%s", + "error=%ld (%s) flags=0x%lx (%s) name=%02x:%02x:%llu/%s", -__entry->error, nfs_show_status(__entry->error), __entry->flags, show_open_flags(__entry->flags), -- cgit v1.2.3-59-g8ed1b From 62a92ba97a31c544802bbf13d3a998e86796d548 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 19 Jun 2019 10:34:09 -0400 Subject: NFS: Record task, client ID, and XID in xdr_status trace points When triggering an nfs_xdr_status trace point, record the task ID and XID of the failing RPC to better pinpoint the problem. This feels like a bit of a layering violation. Suggested-by: Trond Myklebust Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- fs/nfs/nfs2xdr.c | 2 +- fs/nfs/nfs3xdr.c | 2 +- fs/nfs/nfs4trace.h | 15 +++++++++++++-- fs/nfs/nfs4xdr.c | 2 +- fs/nfs/nfstrace.h | 15 +++++++++++++-- 5 files changed, 29 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c index 572794dab4b1..cbc17a203248 100644 --- a/fs/nfs/nfs2xdr.c +++ b/fs/nfs/nfs2xdr.c @@ -151,7 +151,7 @@ static int decode_stat(struct xdr_stream *xdr, enum nfs_stat *status) return 0; out_status: *status = be32_to_cpup(p); - trace_nfs_xdr_status((int)*status); + trace_nfs_xdr_status(xdr, (int)*status); return 0; } diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index abbbdde97e31..602767850b36 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c @@ -343,7 +343,7 @@ static int decode_nfsstat3(struct xdr_stream *xdr, enum nfs_stat *status) return 0; out_status: *status = be32_to_cpup(p); - trace_nfs_xdr_status((int)*status); + trace_nfs_xdr_status(xdr, (int)*status); return 0; } diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h index 9a39fd51c8e7..d85f20945a2b 100644 --- a/fs/nfs/nfs4trace.h +++ b/fs/nfs/nfs4trace.h @@ -564,24 +564,35 @@ TRACE_EVENT(nfs4_setup_sequence, TRACE_EVENT(nfs4_xdr_status, TP_PROTO( + const struct xdr_stream *xdr, u32 op, int error ), - TP_ARGS(op, error), + TP_ARGS(xdr, op, error), TP_STRUCT__entry( + __field(unsigned int, task_id) + __field(unsigned int, client_id) + __field(u32, xid) __field(u32, op) __field(unsigned long, error) ), TP_fast_assign( + const struct rpc_rqst *rqstp = xdr->rqst; + const struct rpc_task *task = rqstp->rq_task; + + __entry->task_id = task->tk_pid; + __entry->client_id = task->tk_client->cl_clid; + __entry->xid = be32_to_cpu(rqstp->rq_xid); __entry->op = op; __entry->error = error; ), TP_printk( - "error=%ld (%s) operation %d:", + "task:%u@%d xid=0x%08x error=%ld (%s) operation=%u", + __entry->task_id, __entry->client_id, __entry->xid, -__entry->error, show_nfsv4_errors(__entry->error), __entry->op ) diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 602446158bfb..d974ff3164ba 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -3187,7 +3187,7 @@ static bool __decode_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 expected, return true; out_status: nfserr = be32_to_cpup(p); - trace_nfs4_xdr_status(opnum, nfserr); + trace_nfs4_xdr_status(xdr, opnum, nfserr); *nfs_retval = nfs4_stat_to_errno(nfserr); return true; out_bad_operation: diff --git a/fs/nfs/nfstrace.h b/fs/nfs/nfstrace.h index cd09356e4772..976d4089e267 100644 --- a/fs/nfs/nfstrace.h +++ b/fs/nfs/nfstrace.h @@ -1139,21 +1139,32 @@ TRACE_DEFINE_ENUM(NFSERR_JUKEBOX); TRACE_EVENT(nfs_xdr_status, TP_PROTO( + const struct xdr_stream *xdr, int error ), - TP_ARGS(error), + TP_ARGS(xdr, error), TP_STRUCT__entry( + __field(unsigned int, task_id) + __field(unsigned int, client_id) + __field(u32, xid) __field(unsigned long, error) ), TP_fast_assign( + const struct rpc_rqst *rqstp = xdr->rqst; + const struct rpc_task *task = rqstp->rq_task; + + __entry->task_id = task->tk_pid; + __entry->client_id = task->tk_client->cl_clid; + __entry->xid = be32_to_cpu(rqstp->rq_xid); __entry->error = error; ), TP_printk( - "error=%ld (%s)", + "task:%u@%d xid=0x%08x error=%ld (%s)", + __entry->task_id, __entry->client_id, __entry->xid, -__entry->error, nfs_show_status(__entry->error) ) ); -- cgit v1.2.3-59-g8ed1b From db531db951f950b86d274cc8ed7b21b9e2240036 Mon Sep 17 00:00:00 2001 From: Max Kellermann Date: Fri, 12 Jul 2019 16:18:06 +0200 Subject: Revert "NFS: readdirplus optimization by cache mechanism" (memleak) This reverts commit be4c2d4723a4a637f0d1b4f7c66447141a4b3564. That commit caused a severe memory leak in nfs_readdir_make_qstr(). When listing a directory with more than 100 files (this is how many struct nfs_cache_array_entry elements fit in one 4kB page), all allocated file name strings past those 100 leak. The root of the leakage is that those string pointers are managed in pages which are never linked into the page cache. fs/nfs/dir.c puts pages into the page cache by calling read_cache_page(); the callback function nfs_readdir_filler() will then fill the given page struct which was passed to it, which is already linked in the page cache (by do_read_cache_page() calling add_to_page_cache_lru()). Commit be4c2d4723a4 added another (local) array of allocated pages, to be filled with more data, instead of discarding excess items received from the NFS server. Those additional pages can be used by the next nfs_readdir_filler() call (from within the same nfs_readdir() call). The leak happens when some of those additional pages are never used (copied to the page cache using copy_highpage()). The pages will be freed by nfs_readdir_free_pages(), but their contents will not. The commit did not invoke nfs_readdir_clear_array() (and doing so would have been dangerous, because it did not track which of those pages were already copied to the page cache, risking double free bugs). How to reproduce the leak: - Use a kernel with CONFIG_SLUB_DEBUG_ON. - Create a directory on a NFS mount with more than 100 files with names long enough to use the "kmalloc-32" slab (so we can easily look up the allocation counts): for i in `seq 110`; do touch ${i}_0123456789abcdef; done - Drop all caches: echo 3 >/proc/sys/vm/drop_caches - Check the allocation counter: grep nfs_readdir /sys/kernel/slab/kmalloc-32/alloc_calls 30564391 nfs_readdir_add_to_array+0x73/0xd0 age=534558/4791307/6540952 pid=370-1048386 cpus=0-47 nodes=0-1 - Request a directory listing and check the allocation counters again: ls [...] grep nfs_readdir /sys/kernel/slab/kmalloc-32/alloc_calls 30564511 nfs_readdir_add_to_array+0x73/0xd0 age=207/4792999/6542663 pid=370-1048386 cpus=0-47 nodes=0-1 There are now 120 new allocations. - Drop all caches and check the counters again: echo 3 >/proc/sys/vm/drop_caches grep nfs_readdir /sys/kernel/slab/kmalloc-32/alloc_calls 30564401 nfs_readdir_add_to_array+0x73/0xd0 age=735/4793524/6543176 pid=370-1048386 cpus=0-47 nodes=0-1 110 allocations are gone, but 10 have leaked and will never be freed. Unhelpfully, those allocations are explicitly excluded from KMEMLEAK, that's why my initial attempts with KMEMLEAK were not successful: /* * Avoid a kmemleak false positive. The pointer to the name is stored * in a page cache page which kmemleak does not scan. */ kmemleak_not_leak(string->name); It would be possible to solve this bug without reverting the whole commit: - keep track of which pages were not used, and call nfs_readdir_clear_array() on them, or - manually link those pages into the page cache But for now I have decided to just revert the commit, because the real fix would require complex considerations, risking more dangerous (crash) bugs, which may seem unsuitable for the stable branches. Signed-off-by: Max Kellermann Cc: stable@vger.kernel.org # v5.1+ Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 90 ++++--------------------------------------------------- fs/nfs/internal.h | 3 +- 2 files changed, 7 insertions(+), 86 deletions(-) (limited to 'fs') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index bd1f9555447b..8d501093660f 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -144,19 +144,12 @@ struct nfs_cache_array { struct nfs_cache_array_entry array[0]; }; -struct readdirvec { - unsigned long nr; - unsigned long index; - struct page *pages[NFS_MAX_READDIR_RAPAGES]; -}; - typedef int (*decode_dirent_t)(struct xdr_stream *, struct nfs_entry *, bool); typedef struct { struct file *file; struct page *page; struct dir_context *ctx; unsigned long page_index; - struct readdirvec pvec; u64 *dir_cookie; u64 last_cookie; loff_t current_index; @@ -536,10 +529,6 @@ int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *en struct nfs_cache_array *array; unsigned int count = 0; int status; - int max_rapages = NFS_MAX_READDIR_RAPAGES; - - desc->pvec.index = desc->page_index; - desc->pvec.nr = 0; scratch = alloc_page(GFP_KERNEL); if (scratch == NULL) @@ -564,40 +553,20 @@ int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *en if (desc->plus) nfs_prime_dcache(file_dentry(desc->file), entry); - status = nfs_readdir_add_to_array(entry, desc->pvec.pages[desc->pvec.nr]); - if (status == -ENOSPC) { - desc->pvec.nr++; - if (desc->pvec.nr == max_rapages) - break; - status = nfs_readdir_add_to_array(entry, desc->pvec.pages[desc->pvec.nr]); - } + status = nfs_readdir_add_to_array(entry, page); if (status != 0) break; } while (!entry->eof); - /* - * page and desc->pvec.pages[0] are valid, don't need to check - * whether or not to be NULL. - */ - copy_highpage(page, desc->pvec.pages[0]); - out_nopages: if (count == 0 || (status == -EBADCOOKIE && entry->eof != 0)) { - array = kmap_atomic(desc->pvec.pages[desc->pvec.nr]); + array = kmap(page); array->eof_index = array->size; status = 0; - kunmap_atomic(array); + kunmap(page); } put_page(scratch); - - /* - * desc->pvec.nr > 0 means at least one page was completely filled, - * we should return -ENOSPC. Otherwise function - * nfs_readdir_xdr_to_array will enter infinite loop. - */ - if (desc->pvec.nr > 0) - return -ENOSPC; return status; } @@ -631,24 +600,6 @@ out_freepages: return -ENOMEM; } -/* - * nfs_readdir_rapages_init initialize rapages by nfs_cache_array structure. - */ -static -void nfs_readdir_rapages_init(nfs_readdir_descriptor_t *desc) -{ - struct nfs_cache_array *array; - int max_rapages = NFS_MAX_READDIR_RAPAGES; - int index; - - for (index = 0; index < max_rapages; index++) { - array = kmap_atomic(desc->pvec.pages[index]); - memset(array, 0, sizeof(struct nfs_cache_array)); - array->eof_index = -1; - kunmap_atomic(array); - } -} - static int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page, struct inode *inode) { @@ -659,12 +610,6 @@ int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page, int status = -ENOMEM; unsigned int array_size = ARRAY_SIZE(pages); - /* - * This means we hit readdir rdpages miss, the preallocated rdpages - * are useless, the preallocate rdpages should be reinitialized. - */ - nfs_readdir_rapages_init(desc); - entry.prev_cookie = 0; entry.cookie = desc->last_cookie; entry.eof = 0; @@ -725,24 +670,9 @@ int nfs_readdir_filler(void *data, struct page* page) struct inode *inode = file_inode(desc->file); int ret; - /* - * If desc->page_index in range desc->pvec.index and - * desc->pvec.index + desc->pvec.nr, we get readdir cache hit. - */ - if (desc->page_index >= desc->pvec.index && - desc->page_index < (desc->pvec.index + desc->pvec.nr)) { - /* - * page and desc->pvec.pages[x] are valid, don't need to check - * whether or not to be NULL. - */ - copy_highpage(page, desc->pvec.pages[desc->page_index - desc->pvec.index]); - ret = 0; - } else { - ret = nfs_readdir_xdr_to_array(desc, page, inode); - if (ret < 0) - goto error; - } - + ret = nfs_readdir_xdr_to_array(desc, page, inode); + if (ret < 0) + goto error; SetPageUptodate(page); if (invalidate_inode_pages2_range(inode->i_mapping, page->index + 1, -1) < 0) { @@ -907,7 +837,6 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx) *desc = &my_desc; struct nfs_open_dir_context *dir_ctx = file->private_data; int res = 0; - int max_rapages = NFS_MAX_READDIR_RAPAGES; dfprintk(FILE, "NFS: readdir(%pD2) starting at cookie %llu\n", file, (long long)ctx->pos); @@ -927,12 +856,6 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx) desc->decode = NFS_PROTO(inode)->decode_dirent; desc->plus = nfs_use_readdirplus(inode, ctx); - res = nfs_readdir_alloc_pages(desc->pvec.pages, max_rapages); - if (res < 0) - return -ENOMEM; - - nfs_readdir_rapages_init(desc); - if (ctx->pos == 0 || nfs_attribute_cache_expired(inode)) res = nfs_revalidate_mapping(inode, file->f_mapping); if (res < 0) @@ -968,7 +891,6 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx) break; } while (!desc->eof); out: - nfs_readdir_free_pages(desc->pvec.pages, max_rapages); if (res > 0) res = 0; dfprintk(FILE, "NFS: readdir(%pD2) returns %d\n", file, res); diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index f359e760ed41..a2346a2f8361 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -69,8 +69,7 @@ struct nfs_clone_mount { * Maximum number of pages that readdir can use for creating * a vmapped array of pages. */ -#define NFS_MAX_READDIR_PAGES 64 -#define NFS_MAX_READDIR_RAPAGES 8 +#define NFS_MAX_READDIR_PAGES 8 struct nfs_client_initdata { unsigned long init_flags; -- cgit v1.2.3-59-g8ed1b From 9bcaa35c686d65091e833f6c97a20baa31d0e870 Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Wed, 3 Jul 2019 15:33:09 +0200 Subject: NFS: Use seq_putc() in nfs_show_stats() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A single character (line break) should be put into a sequence. Thus use the corresponding function “seq_putc”. This issue was detected by using the Coccinelle software. Signed-off-by: Markus Elfring Signed-off-by: Trond Myklebust --- fs/nfs/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/super.c b/fs/nfs/super.c index d01def299d40..6f1749e70202 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -905,7 +905,7 @@ int nfs_show_stats(struct seq_file *m, struct dentry *root) seq_printf(m, "%Lu ", totals.fscache[i]); } #endif - seq_printf(m, "\n"); + seq_putc(m, '\n'); rpc_clnt_show_stats(m, nfss->client); -- cgit v1.2.3-59-g8ed1b From 1c316e39a03af7b3e397f45d65fe00653996c467 Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Wed, 3 Jul 2019 15:50:37 +0200 Subject: NFS: Replace 16 seq_printf() calls by seq_puts() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some strings should be put into a sequence. Thus use the corresponding function “seq_puts”. This issue was detected by using the Coccinelle software. Signed-off-by: Markus Elfring Signed-off-by: Trond Myklebust --- fs/nfs/super.c | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) (limited to 'fs') diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 6f1749e70202..3683d2b1cc8e 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -587,7 +587,7 @@ static void nfs_show_mountd_options(struct seq_file *m, struct nfs_server *nfss, } default: if (showdefaults) - seq_printf(m, ",mountaddr=unspecified"); + seq_puts(m, ",mountaddr=unspecified"); } if (nfss->mountd_version || showdefaults) @@ -697,29 +697,29 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss, nfs_show_nfsv4_options(m, nfss, showdefaults); if (nfss->options & NFS_OPTION_FSCACHE) - seq_printf(m, ",fsc"); + seq_puts(m, ",fsc"); if (nfss->options & NFS_OPTION_MIGRATION) - seq_printf(m, ",migration"); + seq_puts(m, ",migration"); if (nfss->flags & NFS_MOUNT_LOOKUP_CACHE_NONEG) { if (nfss->flags & NFS_MOUNT_LOOKUP_CACHE_NONE) - seq_printf(m, ",lookupcache=none"); + seq_puts(m, ",lookupcache=none"); else - seq_printf(m, ",lookupcache=pos"); + seq_puts(m, ",lookupcache=pos"); } local_flock = nfss->flags & NFS_MOUNT_LOCAL_FLOCK; local_fcntl = nfss->flags & NFS_MOUNT_LOCAL_FCNTL; if (!local_flock && !local_fcntl) - seq_printf(m, ",local_lock=none"); + seq_puts(m, ",local_lock=none"); else if (local_flock && local_fcntl) - seq_printf(m, ",local_lock=all"); + seq_puts(m, ",local_lock=all"); else if (local_flock) - seq_printf(m, ",local_lock=flock"); + seq_puts(m, ",local_lock=flock"); else - seq_printf(m, ",local_lock=posix"); + seq_puts(m, ",local_lock=posix"); } /* @@ -756,7 +756,7 @@ static void show_lease(struct seq_file *m, struct nfs_server *server) static void show_sessions(struct seq_file *m, struct nfs_server *server) { if (nfs4_has_session(server->nfs_client)) - seq_printf(m, ",sessions"); + seq_puts(m, ",sessions"); } #else static void show_sessions(struct seq_file *m, struct nfs_server *server) {} @@ -833,7 +833,7 @@ int nfs_show_stats(struct seq_file *m, struct dentry *root) /* * Display all mount option settings */ - seq_printf(m, "\n\topts:\t"); + seq_puts(m, "\n\topts:\t"); seq_puts(m, sb_rdonly(root->d_sb) ? "ro" : "rw"); seq_puts(m, root->d_sb->s_flags & SB_SYNCHRONOUS ? ",sync" : ""); seq_puts(m, root->d_sb->s_flags & SB_NOATIME ? ",noatime" : ""); @@ -844,7 +844,7 @@ int nfs_show_stats(struct seq_file *m, struct dentry *root) show_implementation_id(m, nfss); - seq_printf(m, "\n\tcaps:\t"); + seq_puts(m, "\n\tcaps:\t"); seq_printf(m, "caps=0x%x", nfss->caps); seq_printf(m, ",wtmult=%u", nfss->wtmult); seq_printf(m, ",dtsize=%u", nfss->dtsize); @@ -853,7 +853,7 @@ int nfs_show_stats(struct seq_file *m, struct dentry *root) #if IS_ENABLED(CONFIG_NFS_V4) if (nfss->nfs_client->rpc_ops->version == 4) { - seq_printf(m, "\n\tnfsv4:\t"); + seq_puts(m, "\n\tnfsv4:\t"); seq_printf(m, "bm0=0x%x", nfss->attr_bitmask[0]); seq_printf(m, ",bm1=0x%x", nfss->attr_bitmask[1]); seq_printf(m, ",bm2=0x%x", nfss->attr_bitmask[2]); @@ -892,15 +892,15 @@ int nfs_show_stats(struct seq_file *m, struct dentry *root) preempt_enable(); } - seq_printf(m, "\n\tevents:\t"); + seq_puts(m, "\n\tevents:\t"); for (i = 0; i < __NFSIOS_COUNTSMAX; i++) seq_printf(m, "%lu ", totals.events[i]); - seq_printf(m, "\n\tbytes:\t"); + seq_puts(m, "\n\tbytes:\t"); for (i = 0; i < __NFSIOS_BYTESMAX; i++) seq_printf(m, "%Lu ", totals.bytes[i]); #ifdef CONFIG_NFS_FSCACHE if (nfss->options & NFS_OPTION_FSCACHE) { - seq_printf(m, "\n\tfsc:\t"); + seq_puts(m, "\n\tfsc:\t"); for (i = 0; i < __NFSIOS_FSCACHEMAX; i++) seq_printf(m, "%Lu ", totals.fscache[i]); } -- cgit v1.2.3-59-g8ed1b From 2eaf426debdce566df9302b218307483903ac534 Mon Sep 17 00:00:00 2001 From: Donald Buczek Date: Sun, 7 Jul 2019 21:26:07 +0200 Subject: nfs: Fix copy-and-paste error in debug message The debug message of decode_attr_lease_time incorrectly says "file size". Fix it to "lease time". Signed-off-by: Donald Buczek Signed-off-by: Trond Myklebust --- fs/nfs/nfs4xdr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index d974ff3164ba..620f8ff5684a 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -3427,7 +3427,7 @@ static int decode_attr_lease_time(struct xdr_stream *xdr, uint32_t *bitmap, uint *res = be32_to_cpup(p); bitmap[0] &= ~FATTR4_WORD0_LEASE_TIME; } - dprintk("%s: file size=%u\n", __func__, (unsigned int)*res); + dprintk("%s: lease time=%u\n", __func__, (unsigned int)*res); return 0; } -- cgit v1.2.3-59-g8ed1b From 0efb01b2ac07976b350b1899333a961fd51d1fdc Mon Sep 17 00:00:00 2001 From: Donald Buczek Date: Sun, 7 Jul 2019 21:26:08 +0200 Subject: nfs4: Make nfs4_proc_get_lease_time available for nfs4.0 Compile nfs4_proc_get_lease_time, enc_get_lease_time and dec_get_lease_time for nfs4.0. Use nfs4_sequence_done instead of nfs41_sequence_done in nfs4_proc_get_lease_time, Signed-off-by: Donald Buczek Signed-off-by: Trond Myklebust --- fs/nfs/nfs4_fs.h | 4 ++-- fs/nfs/nfs4proc.c | 6 +++++- fs/nfs/nfs4xdr.c | 12 +++++++++++- 3 files changed, 18 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 8a38a254f516..d778dad9a75e 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -312,12 +312,12 @@ extern int nfs4_set_rw_stateid(nfs4_stateid *stateid, const struct nfs_lock_context *l_ctx, fmode_t fmode); +extern int nfs4_proc_get_lease_time(struct nfs_client *clp, + struct nfs_fsinfo *fsinfo); #if defined(CONFIG_NFS_V4_1) extern int nfs41_sequence_done(struct rpc_task *, struct nfs4_sequence_res *); extern int nfs4_proc_create_session(struct nfs_client *, const struct cred *); extern int nfs4_proc_destroy_session(struct nfs4_session *, const struct cred *); -extern int nfs4_proc_get_lease_time(struct nfs_client *clp, - struct nfs_fsinfo *fsinfo); extern int nfs4_proc_layoutcommit(struct nfs4_layoutcommit_data *data, bool sync); extern int nfs4_detect_session_trunking(struct nfs_client *clp, diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index d115d9973efc..ab362636443d 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -8255,6 +8255,8 @@ out: return ret; } +#endif /* CONFIG_NFS_V4_1 */ + struct nfs4_get_lease_time_data { struct nfs4_get_lease_time_args *args; struct nfs4_get_lease_time_res *res; @@ -8287,7 +8289,7 @@ static void nfs4_get_lease_time_done(struct rpc_task *task, void *calldata) (struct nfs4_get_lease_time_data *)calldata; dprintk("--> %s\n", __func__); - if (!nfs41_sequence_done(task, &data->res->lr_seq_res)) + if (!nfs4_sequence_done(task, &data->res->lr_seq_res)) return; switch (task->tk_status) { case -NFS4ERR_DELAY: @@ -8345,6 +8347,8 @@ int nfs4_proc_get_lease_time(struct nfs_client *clp, struct nfs_fsinfo *fsinfo) return status; } +#ifdef CONFIG_NFS_V4_1 + /* * Initialize the values to be used by the client in CREATE_SESSION * If nfs4_init_session set the fore channel request and response sizes, diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 620f8ff5684a..46a8d636d151 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -837,6 +837,7 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req, #define NFS4_dec_sequence_sz \ (compound_decode_hdr_maxsz + \ decode_sequence_maxsz) +#endif #define NFS4_enc_get_lease_time_sz (compound_encode_hdr_maxsz + \ encode_sequence_maxsz + \ encode_putrootfh_maxsz + \ @@ -845,6 +846,7 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req, decode_sequence_maxsz + \ decode_putrootfh_maxsz + \ decode_fsinfo_maxsz) +#if defined(CONFIG_NFS_V4_1) #define NFS4_enc_reclaim_complete_sz (compound_encode_hdr_maxsz + \ encode_sequence_maxsz + \ encode_reclaim_complete_maxsz) @@ -2957,6 +2959,8 @@ static void nfs4_xdr_enc_sequence(struct rpc_rqst *req, struct xdr_stream *xdr, encode_nops(&hdr); } +#endif + /* * a GET_LEASE_TIME request */ @@ -2977,6 +2981,8 @@ static void nfs4_xdr_enc_get_lease_time(struct rpc_rqst *req, encode_nops(&hdr); } +#ifdef CONFIG_NFS_V4_1 + /* * a RECLAIM_COMPLETE request */ @@ -7122,6 +7128,8 @@ static int nfs4_xdr_dec_sequence(struct rpc_rqst *rqstp, return status; } +#endif + /* * Decode GET_LEASE_TIME response */ @@ -7143,6 +7151,8 @@ static int nfs4_xdr_dec_get_lease_time(struct rpc_rqst *rqstp, return status; } +#ifdef CONFIG_NFS_V4_1 + /* * Decode RECLAIM_COMPLETE response */ @@ -7551,7 +7561,7 @@ const struct rpc_procinfo nfs4_procedures[] = { PROC41(CREATE_SESSION, enc_create_session, dec_create_session), PROC41(DESTROY_SESSION, enc_destroy_session, dec_destroy_session), PROC41(SEQUENCE, enc_sequence, dec_sequence), - PROC41(GET_LEASE_TIME, enc_get_lease_time, dec_get_lease_time), + PROC(GET_LEASE_TIME, enc_get_lease_time, dec_get_lease_time), PROC41(RECLAIM_COMPLETE,enc_reclaim_complete, dec_reclaim_complete), PROC41(GETDEVICEINFO, enc_getdeviceinfo, dec_getdeviceinfo), PROC41(LAYOUTGET, enc_layoutget, dec_layoutget), -- cgit v1.2.3-59-g8ed1b From ea51efaa9617ee011f53bcd29752c51cda540e06 Mon Sep 17 00:00:00 2001 From: Donald Buczek Date: Sun, 7 Jul 2019 21:26:09 +0200 Subject: nfs4: Rename nfs41_setup_state_renewal The function nfs41_setup_state_renewal is useful to the nfs 4.0 client as well, so rename the function to nfs4_setup_state_renewal. Signed-off-by: Donald Buczek Signed-off-by: Trond Myklebust --- fs/nfs/nfs4state.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index e2e3c4f04d3e..778ebfb00d13 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -286,7 +286,7 @@ static int nfs4_begin_drain_session(struct nfs_client *clp) #if defined(CONFIG_NFS_V4_1) -static int nfs41_setup_state_renewal(struct nfs_client *clp) +static int nfs4_setup_state_renewal(struct nfs_client *clp) { int status; struct nfs_fsinfo fsinfo; @@ -313,7 +313,7 @@ static void nfs41_finish_session_reset(struct nfs_client *clp) clear_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state); /* create_session negotiated new slot table */ clear_bit(NFS4CLNT_BIND_CONN_TO_SESSION, &clp->cl_state); - nfs41_setup_state_renewal(clp); + nfs4_setup_state_renewal(clp); } int nfs41_init_clientid(struct nfs_client *clp, const struct cred *cred) -- cgit v1.2.3-59-g8ed1b From 5b596830d9710e3554354f717dc56f8640841c82 Mon Sep 17 00:00:00 2001 From: Donald Buczek Date: Sun, 7 Jul 2019 21:26:10 +0200 Subject: nfs4.0: Refetch lease_time after clientid update RFC 7530 requires us to refetch the lease time attribute once a new clientID is established. This is already implemented for the nfs4.1(+) clients by nfs41_init_clientid, which calls nfs41_finish_session_reset, which calls nfs4_setup_state_renewal. To make nfs4_setup_state_renewal available for nfs4.0, move it further to the top of the source file to include it regardles of CONFIG_NFS_V4_1 and to save a forward declaration. Call nfs4_setup_state_renewal from nfs4_init_clientid. Signed-off-by: Donald Buczek Signed-off-by: Trond Myklebust --- fs/nfs/nfs4state.c | 44 ++++++++++++++++++++++---------------------- 1 file changed, 22 insertions(+), 22 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 778ebfb00d13..f32b02c2bc73 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -87,6 +87,27 @@ const nfs4_stateid current_stateid = { static DEFINE_MUTEX(nfs_clid_init_mutex); +static int nfs4_setup_state_renewal(struct nfs_client *clp) +{ + int status; + struct nfs_fsinfo fsinfo; + unsigned long now; + + if (!test_bit(NFS_CS_CHECK_LEASE_TIME, &clp->cl_res_state)) { + nfs4_schedule_state_renewal(clp); + return 0; + } + + now = jiffies; + status = nfs4_proc_get_lease_time(clp, &fsinfo); + if (status == 0) { + nfs4_set_lease_period(clp, fsinfo.lease_time * HZ, now); + nfs4_schedule_state_renewal(clp); + } + + return status; +} + int nfs4_init_clientid(struct nfs_client *clp, const struct cred *cred) { struct nfs4_setclientid_res clid = { @@ -114,7 +135,7 @@ do_confirm: if (status != 0) goto out; clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state); - nfs4_schedule_state_renewal(clp); + nfs4_setup_state_renewal(clp); out: return status; } @@ -286,27 +307,6 @@ static int nfs4_begin_drain_session(struct nfs_client *clp) #if defined(CONFIG_NFS_V4_1) -static int nfs4_setup_state_renewal(struct nfs_client *clp) -{ - int status; - struct nfs_fsinfo fsinfo; - unsigned long now; - - if (!test_bit(NFS_CS_CHECK_LEASE_TIME, &clp->cl_res_state)) { - nfs4_schedule_state_renewal(clp); - return 0; - } - - now = jiffies; - status = nfs4_proc_get_lease_time(clp, &fsinfo); - if (status == 0) { - nfs4_set_lease_period(clp, fsinfo.lease_time * HZ, now); - nfs4_schedule_state_renewal(clp); - } - - return status; -} - static void nfs41_finish_session_reset(struct nfs_client *clp) { clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state); -- cgit v1.2.3-59-g8ed1b From 50c8000744463aa8534de0d739b50ed4f06f8275 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 11 Jul 2019 19:02:18 -0400 Subject: NFSv4: Validate the stateid before applying it to state recovery If the stateid is the zero or invalid stateid, then it is pointless to attempt to use it for recovery. In that case, try to fall back to using the open state stateid, or just doing a general recovery of all state on a given inode. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index ab362636443d..52de7245a2ee 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -428,6 +428,22 @@ static int nfs4_delay(long *timeout, bool interruptible) return nfs4_delay_killable(timeout); } +static const nfs4_stateid * +nfs4_recoverable_stateid(const nfs4_stateid *stateid) +{ + if (!stateid) + return NULL; + switch (stateid->type) { + case NFS4_OPEN_STATEID_TYPE: + case NFS4_LOCK_STATEID_TYPE: + case NFS4_DELEGATION_STATEID_TYPE: + return stateid; + default: + break; + } + return NULL; +} + /* This is the error handling routine for processes that are allowed * to sleep. */ @@ -436,7 +452,7 @@ static int nfs4_do_handle_exception(struct nfs_server *server, { struct nfs_client *clp = server->nfs_client; struct nfs4_state *state = exception->state; - const nfs4_stateid *stateid = exception->stateid; + const nfs4_stateid *stateid; struct inode *inode = exception->inode; int ret = errorcode; @@ -444,8 +460,9 @@ static int nfs4_do_handle_exception(struct nfs_server *server, exception->recovering = 0; exception->retry = 0; + stateid = nfs4_recoverable_stateid(exception->stateid); if (stateid == NULL && state != NULL) - stateid = &state->stateid; + stateid = nfs4_recoverable_stateid(&state->stateid); switch(errorcode) { case 0: -- cgit v1.2.3-59-g8ed1b From 7402a4fedc2bc448100c2d086406c708451b16dc Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 16 Jul 2019 13:51:29 -0400 Subject: SUNRPC: Fix up backchannel slot table accounting Add a per-transport maximum limit in the socket case, and add helpers to allow the NFSv4 code to discover that limit. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 3 +++ include/linux/sunrpc/bc_xprt.h | 1 + include/linux/sunrpc/clnt.h | 1 + include/linux/sunrpc/xprt.h | 6 ++++-- net/sunrpc/backchannel_rqst.c | 40 +++++++++++++++++++++------------------ net/sunrpc/clnt.c | 13 +++++++++++++ net/sunrpc/svc.c | 2 +- net/sunrpc/xprtrdma/backchannel.c | 7 +++++++ net/sunrpc/xprtrdma/transport.c | 1 + net/sunrpc/xprtrdma/xprt_rdma.h | 1 + net/sunrpc/xprtsock.c | 1 + 11 files changed, 55 insertions(+), 21 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 52de7245a2ee..39896afc6edf 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -8380,6 +8380,7 @@ static void nfs4_init_channel_attrs(struct nfs41_create_session_args *args, { unsigned int max_rqst_sz, max_resp_sz; unsigned int max_bc_payload = rpc_max_bc_payload(clnt); + unsigned int max_bc_slots = rpc_num_bc_slots(clnt); max_rqst_sz = NFS_MAX_FILE_IO_SIZE + nfs41_maxwrite_overhead; max_resp_sz = NFS_MAX_FILE_IO_SIZE + nfs41_maxread_overhead; @@ -8402,6 +8403,8 @@ static void nfs4_init_channel_attrs(struct nfs41_create_session_args *args, args->bc_attrs.max_resp_sz_cached = 0; args->bc_attrs.max_ops = NFS4_MAX_BACK_CHANNEL_OPS; args->bc_attrs.max_reqs = max_t(unsigned short, max_session_cb_slots, 1); + if (args->bc_attrs.max_reqs > max_bc_slots) + args->bc_attrs.max_reqs = max_bc_slots; dprintk("%s: Back Channel : max_rqst_sz=%u max_resp_sz=%u " "max_resp_sz_cached=%u max_ops=%u max_reqs=%u\n", diff --git a/include/linux/sunrpc/bc_xprt.h b/include/linux/sunrpc/bc_xprt.h index d4229a78524a..87d27e13d885 100644 --- a/include/linux/sunrpc/bc_xprt.h +++ b/include/linux/sunrpc/bc_xprt.h @@ -43,6 +43,7 @@ void xprt_destroy_backchannel(struct rpc_xprt *, unsigned int max_reqs); int xprt_setup_bc(struct rpc_xprt *xprt, unsigned int min_reqs); void xprt_destroy_bc(struct rpc_xprt *xprt, unsigned int max_reqs); void xprt_free_bc_rqst(struct rpc_rqst *req); +unsigned int xprt_bc_max_slots(struct rpc_xprt *xprt); /* * Determine if a shared backchannel is in use diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 4e070e00c143..abc63bd1be2b 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -194,6 +194,7 @@ void rpc_setbufsize(struct rpc_clnt *, unsigned int, unsigned int); struct net * rpc_net_ns(struct rpc_clnt *); size_t rpc_max_payload(struct rpc_clnt *); size_t rpc_max_bc_payload(struct rpc_clnt *); +unsigned int rpc_num_bc_slots(struct rpc_clnt *); void rpc_force_rebind(struct rpc_clnt *); size_t rpc_peeraddr(struct rpc_clnt *, struct sockaddr *, size_t); const char *rpc_peeraddr2str(struct rpc_clnt *, enum rpc_display_format_t); diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index ed76e5fb36c1..13e108bcc9eb 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -158,6 +158,7 @@ struct rpc_xprt_ops { int (*bc_setup)(struct rpc_xprt *xprt, unsigned int min_reqs); size_t (*bc_maxpayload)(struct rpc_xprt *xprt); + unsigned int (*bc_num_slots)(struct rpc_xprt *xprt); void (*bc_free_rqst)(struct rpc_rqst *rqst); void (*bc_destroy)(struct rpc_xprt *xprt, unsigned int max_reqs); @@ -251,8 +252,9 @@ struct rpc_xprt { #if defined(CONFIG_SUNRPC_BACKCHANNEL) struct svc_serv *bc_serv; /* The RPC service which will */ /* process the callback */ - int bc_alloc_count; /* Total number of preallocs */ - atomic_t bc_free_slots; + unsigned int bc_alloc_max; + unsigned int bc_alloc_count; /* Total number of preallocs */ + atomic_t bc_slot_count; /* Number of allocated slots */ spinlock_t bc_pa_lock; /* Protects the preallocated * items */ struct list_head bc_pa_list; /* List of preallocated diff --git a/net/sunrpc/backchannel_rqst.c b/net/sunrpc/backchannel_rqst.c index c47d82622fd1..339e8c077c2d 100644 --- a/net/sunrpc/backchannel_rqst.c +++ b/net/sunrpc/backchannel_rqst.c @@ -31,25 +31,20 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define RPCDBG_FACILITY RPCDBG_TRANS #endif +#define BC_MAX_SLOTS 64U + +unsigned int xprt_bc_max_slots(struct rpc_xprt *xprt) +{ + return BC_MAX_SLOTS; +} + /* * Helper routines that track the number of preallocation elements * on the transport. */ static inline int xprt_need_to_requeue(struct rpc_xprt *xprt) { - return xprt->bc_alloc_count < atomic_read(&xprt->bc_free_slots); -} - -static inline void xprt_inc_alloc_count(struct rpc_xprt *xprt, unsigned int n) -{ - atomic_add(n, &xprt->bc_free_slots); - xprt->bc_alloc_count += n; -} - -static inline int xprt_dec_alloc_count(struct rpc_xprt *xprt, unsigned int n) -{ - atomic_sub(n, &xprt->bc_free_slots); - return xprt->bc_alloc_count -= n; + return xprt->bc_alloc_count < xprt->bc_alloc_max; } /* @@ -145,6 +140,9 @@ int xprt_setup_bc(struct rpc_xprt *xprt, unsigned int min_reqs) dprintk("RPC: setup backchannel transport\n"); + if (min_reqs > BC_MAX_SLOTS) + min_reqs = BC_MAX_SLOTS; + /* * We use a temporary list to keep track of the preallocated * buffers. Once we're done building the list we splice it @@ -172,7 +170,9 @@ int xprt_setup_bc(struct rpc_xprt *xprt, unsigned int min_reqs) */ spin_lock(&xprt->bc_pa_lock); list_splice(&tmp_list, &xprt->bc_pa_list); - xprt_inc_alloc_count(xprt, min_reqs); + xprt->bc_alloc_count += min_reqs; + xprt->bc_alloc_max += min_reqs; + atomic_add(min_reqs, &xprt->bc_slot_count); spin_unlock(&xprt->bc_pa_lock); dprintk("RPC: setup backchannel transport done\n"); @@ -220,11 +220,13 @@ void xprt_destroy_bc(struct rpc_xprt *xprt, unsigned int max_reqs) goto out; spin_lock_bh(&xprt->bc_pa_lock); - xprt_dec_alloc_count(xprt, max_reqs); + xprt->bc_alloc_max -= max_reqs; list_for_each_entry_safe(req, tmp, &xprt->bc_pa_list, rq_bc_pa_list) { dprintk("RPC: req=%p\n", req); list_del(&req->rq_bc_pa_list); xprt_free_allocation(req); + xprt->bc_alloc_count--; + atomic_dec(&xprt->bc_slot_count); if (--max_reqs == 0) break; } @@ -241,13 +243,14 @@ static struct rpc_rqst *xprt_get_bc_request(struct rpc_xprt *xprt, __be32 xid, struct rpc_rqst *req = NULL; dprintk("RPC: allocate a backchannel request\n"); - if (atomic_read(&xprt->bc_free_slots) <= 0) - goto not_found; if (list_empty(&xprt->bc_pa_list)) { if (!new) goto not_found; + if (atomic_read(&xprt->bc_slot_count) >= BC_MAX_SLOTS) + goto not_found; list_add_tail(&new->rq_bc_pa_list, &xprt->bc_pa_list); xprt->bc_alloc_count++; + atomic_inc(&xprt->bc_slot_count); } req = list_first_entry(&xprt->bc_pa_list, struct rpc_rqst, rq_bc_pa_list); @@ -291,6 +294,7 @@ void xprt_free_bc_rqst(struct rpc_rqst *req) if (xprt_need_to_requeue(xprt)) { list_add_tail(&req->rq_bc_pa_list, &xprt->bc_pa_list); xprt->bc_alloc_count++; + atomic_inc(&xprt->bc_slot_count); req = NULL; } spin_unlock_bh(&xprt->bc_pa_lock); @@ -357,7 +361,7 @@ void xprt_complete_bc_request(struct rpc_rqst *req, uint32_t copied) spin_lock(&xprt->bc_pa_lock); list_del(&req->rq_bc_pa_list); - xprt_dec_alloc_count(xprt, 1); + xprt->bc_alloc_count--; spin_unlock(&xprt->bc_pa_lock); req->rq_private_buf.len = copied; diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 383555d2b522..79c849391cb9 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1526,6 +1526,19 @@ size_t rpc_max_bc_payload(struct rpc_clnt *clnt) } EXPORT_SYMBOL_GPL(rpc_max_bc_payload); +unsigned int rpc_num_bc_slots(struct rpc_clnt *clnt) +{ + struct rpc_xprt *xprt; + unsigned int ret; + + rcu_read_lock(); + xprt = rcu_dereference(clnt->cl_xprt); + ret = xprt->ops->bc_num_slots(xprt); + rcu_read_unlock(); + return ret; +} +EXPORT_SYMBOL_GPL(rpc_num_bc_slots); + /** * rpc_force_rebind - force transport to check that remote port is unchanged * @clnt: client to rebind diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index e15cb704453e..220b79988000 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -1595,7 +1595,7 @@ bc_svc_process(struct svc_serv *serv, struct rpc_rqst *req, /* Parse and execute the bc call */ proc_error = svc_process_common(rqstp, argv, resv); - atomic_inc(&req->rq_xprt->bc_free_slots); + atomic_dec(&req->rq_xprt->bc_slot_count); if (!proc_error) { /* Processing error: drop the request */ xprt_free_bc_request(req); diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c index ce986591f213..59e624b1d7a0 100644 --- a/net/sunrpc/xprtrdma/backchannel.c +++ b/net/sunrpc/xprtrdma/backchannel.c @@ -52,6 +52,13 @@ size_t xprt_rdma_bc_maxpayload(struct rpc_xprt *xprt) return maxmsg - RPCRDMA_HDRLEN_MIN; } +unsigned int xprt_rdma_bc_max_slots(struct rpc_xprt *xprt) +{ + struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); + + return r_xprt->rx_buf.rb_bc_srv_max_requests; +} + static int rpcrdma_bc_marshal_reply(struct rpc_rqst *rqst) { struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt); diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 4993aa49ecbe..52abddac19e5 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -812,6 +812,7 @@ static const struct rpc_xprt_ops xprt_rdma_procs = { #if defined(CONFIG_SUNRPC_BACKCHANNEL) .bc_setup = xprt_rdma_bc_setup, .bc_maxpayload = xprt_rdma_bc_maxpayload, + .bc_num_slots = xprt_rdma_bc_max_slots, .bc_free_rqst = xprt_rdma_bc_free_rqst, .bc_destroy = xprt_rdma_bc_destroy, #endif diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index 8378f45d2da7..92ce09fcea74 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -605,6 +605,7 @@ void xprt_rdma_cleanup(void); #if defined(CONFIG_SUNRPC_BACKCHANNEL) int xprt_rdma_bc_setup(struct rpc_xprt *, unsigned int); size_t xprt_rdma_bc_maxpayload(struct rpc_xprt *); +unsigned int xprt_rdma_bc_max_slots(struct rpc_xprt *); int rpcrdma_bc_post_recv(struct rpcrdma_xprt *, unsigned int); void rpcrdma_bc_receive_call(struct rpcrdma_xprt *, struct rpcrdma_rep *); int xprt_rdma_bc_send_reply(struct rpc_rqst *rqst); diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 3c2cc96afcaa..6b1fca51028a 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -2788,6 +2788,7 @@ static const struct rpc_xprt_ops xs_tcp_ops = { #ifdef CONFIG_SUNRPC_BACKCHANNEL .bc_setup = xprt_setup_bc, .bc_maxpayload = xs_tcp_bc_maxpayload, + .bc_num_slots = xprt_bc_max_slots, .bc_free_rqst = xprt_free_bc_rqst, .bc_destroy = xprt_destroy_bc, #endif -- cgit v1.2.3-59-g8ed1b From d9aba2b40de6fddd83f2fe3a5ac2bcd2c98fa66b Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 16 Jul 2019 15:38:28 -0400 Subject: NFSv4: Don't use the zero stateid with layoutget The NFSv4.1 protocol explicitly forbids us from using the zero stateid together with layoutget, so when we see that nfs4_select_rw_stateid() is unable to return a valid delegation, lock or open stateid, then we should initiate recovery and retry. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4state.c | 3 +-- fs/nfs/pnfs.c | 14 +++++++++++--- 2 files changed, 12 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index f32b02c2bc73..9afd051a4876 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1064,8 +1064,7 @@ int nfs4_select_rw_stateid(struct nfs4_state *state, * choose to use. */ goto out; - nfs4_copy_open_stateid(dst, state); - ret = 0; + ret = nfs4_copy_open_stateid(dst, state) ? 0 : -EAGAIN; out: if (nfs_server_capable(state->inode, NFS_CAP_STATEID_NFSV41)) dst->seqid = 0; diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 56e423cd8180..5b9145c62fd9 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1915,6 +1915,7 @@ lookup_again: * stateid. */ if (test_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags)) { + int status; /* * The first layoutget for the file. Need to serialize per @@ -1934,13 +1935,20 @@ lookup_again: } first = true; - if (nfs4_select_rw_stateid(ctx->state, + status = nfs4_select_rw_stateid(ctx->state, iomode == IOMODE_RW ? FMODE_WRITE : FMODE_READ, - NULL, &stateid, NULL) != 0) { + NULL, &stateid, NULL); + if (status != 0) { trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, PNFS_UPDATE_LAYOUT_INVALID_OPEN); - goto out_unlock; + if (status != -EAGAIN) + goto out_unlock; + spin_unlock(&ino->i_lock); + nfs4_schedule_stateid_recovery(server, ctx->state); + pnfs_clear_first_layoutget(lo); + pnfs_put_layout_hdr(lo); + goto lookup_again; } } else { nfs4_stateid_copy(&stateid, &lo->plh_stateid); -- cgit v1.2.3-59-g8ed1b From 8e04fdfadda75a849c649f7e50fe7d97772e1fcb Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 17 Jul 2019 13:57:44 -0400 Subject: pnfs/flexfiles: Fix PTR_ERR() dereferences in ff_layout_track_ds_error mirror->mirror_ds can be NULL if uninitialised, but can contain a PTR_ERR() if call to GETDEVICEINFO failed. Fixes: 65990d1afbd2 ("pNFS/flexfiles: Fix a deadlock on LAYOUTGET") Signed-off-by: Trond Myklebust Cc: stable@vger.kernel.org # 4.10+ --- fs/nfs/flexfilelayout/flexfilelayoutdev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c index 19f856f45689..3eda40a320a5 100644 --- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c +++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c @@ -257,7 +257,7 @@ int ff_layout_track_ds_error(struct nfs4_flexfile_layout *flo, if (status == 0) return 0; - if (mirror->mirror_ds == NULL) + if (IS_ERR_OR_NULL(mirror->mirror_ds)) return -EINVAL; dserr = kmalloc(sizeof(*dserr), gfp_flags); -- cgit v1.2.3-59-g8ed1b From 58bbeab425c6c5e318f5b6ae31d351331ddfb34b Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 18 Jul 2019 15:33:42 -0400 Subject: pnfs: Fix a problem where we gratuitously start doing I/O through the MDS If the client has to stop in pnfs_update_layout() to wait for another layoutget to complete, it currently exits and defaults to I/O through the MDS if the layoutget was successful. Fixes: d03360aaf5cc ("pNFS: Ensure we return the error if someone kills...") Signed-off-by: Trond Myklebust Cc: stable@vger.kernel.org # v4.20+ --- fs/nfs/pnfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 5b9145c62fd9..758917463700 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1890,7 +1890,7 @@ lookup_again: spin_unlock(&ino->i_lock); lseg = ERR_PTR(wait_var_event_killable(&lo->plh_outstanding, !atomic_read(&lo->plh_outstanding))); - if (IS_ERR(lseg) || !list_empty(&lo->plh_segs)) + if (IS_ERR(lseg)) goto out_put_layout_hdr; pnfs_put_layout_hdr(lo); goto lookup_again; -- cgit v1.2.3-59-g8ed1b From d5b9216fd5114be4ed98ca9c1ecc5f164cd8cf5e Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 18 Jul 2019 09:32:17 -0400 Subject: pnfs/flexfiles: Add tracepoints for detecting pnfs fallback to MDS Add tracepoints to allow debugging of the event chain leading to a pnfs fallback to doing I/O through the MDS. Signed-off-by: Trond Myklebust --- fs/nfs/flexfilelayout/flexfilelayout.c | 26 ++++++++++++ fs/nfs/nfs4trace.c | 8 ++++ fs/nfs/nfs4trace.h | 76 +++++++++++++++++++++++++++++++++- fs/nfs/pnfs.c | 2 + include/linux/nfs4.h | 1 + 5 files changed, 112 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index bcff3bf5ae09..b04e20d28162 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -934,6 +934,10 @@ out_nolseg: if (pgio->pg_error < 0) return; out_mds: + trace_pnfs_mds_fallback_pg_init_read(pgio->pg_inode, + 0, NFS4_MAX_UINT64, IOMODE_READ, + NFS_I(pgio->pg_inode)->layout, + pgio->pg_lseg); pnfs_put_lseg(pgio->pg_lseg); pgio->pg_lseg = NULL; nfs_pageio_reset_read_mds(pgio); @@ -1000,6 +1004,10 @@ retry: return; out_mds: + trace_pnfs_mds_fallback_pg_init_write(pgio->pg_inode, + 0, NFS4_MAX_UINT64, IOMODE_RW, + NFS_I(pgio->pg_inode)->layout, + pgio->pg_lseg); pnfs_put_lseg(pgio->pg_lseg); pgio->pg_lseg = NULL; nfs_pageio_reset_write_mds(pgio); @@ -1026,6 +1034,10 @@ ff_layout_pg_get_mirror_count_write(struct nfs_pageio_descriptor *pgio, if (pgio->pg_lseg) return FF_LAYOUT_MIRROR_COUNT(pgio->pg_lseg); + trace_pnfs_mds_fallback_pg_get_mirror_count(pgio->pg_inode, + 0, NFS4_MAX_UINT64, IOMODE_RW, + NFS_I(pgio->pg_inode)->layout, + pgio->pg_lseg); /* no lseg means that pnfs is not in use, so no mirroring here */ nfs_pageio_reset_write_mds(pgio); out: @@ -1075,6 +1087,10 @@ static void ff_layout_reset_write(struct nfs_pgio_header *hdr, bool retry_pnfs) hdr->args.count, (unsigned long long)hdr->args.offset); + trace_pnfs_mds_fallback_write_done(hdr->inode, + hdr->args.offset, hdr->args.count, + IOMODE_RW, NFS_I(hdr->inode)->layout, + hdr->lseg); task->tk_status = pnfs_write_done_resend_to_mds(hdr); } } @@ -1094,6 +1110,10 @@ static void ff_layout_reset_read(struct nfs_pgio_header *hdr) hdr->args.count, (unsigned long long)hdr->args.offset); + trace_pnfs_mds_fallback_read_done(hdr->inode, + hdr->args.offset, hdr->args.count, + IOMODE_READ, NFS_I(hdr->inode)->layout, + hdr->lseg); task->tk_status = pnfs_read_done_resend_to_mds(hdr); } } @@ -1827,6 +1847,9 @@ ff_layout_read_pagelist(struct nfs_pgio_header *hdr) out_failed: if (ff_layout_avoid_mds_available_ds(lseg)) return PNFS_TRY_AGAIN; + trace_pnfs_mds_fallback_read_pagelist(hdr->inode, + hdr->args.offset, hdr->args.count, + IOMODE_READ, NFS_I(hdr->inode)->layout, lseg); return PNFS_NOT_ATTEMPTED; } @@ -1892,6 +1915,9 @@ ff_layout_write_pagelist(struct nfs_pgio_header *hdr, int sync) out_failed: if (ff_layout_avoid_mds_available_ds(lseg)) return PNFS_TRY_AGAIN; + trace_pnfs_mds_fallback_write_pagelist(hdr->inode, + hdr->args.offset, hdr->args.count, + IOMODE_RW, NFS_I(hdr->inode)->layout, lseg); return PNFS_NOT_ATTEMPTED; } diff --git a/fs/nfs/nfs4trace.c b/fs/nfs/nfs4trace.c index e9fb3e50a999..1a8f376b3f73 100644 --- a/fs/nfs/nfs4trace.c +++ b/fs/nfs/nfs4trace.c @@ -16,4 +16,12 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(nfs4_pnfs_read); EXPORT_TRACEPOINT_SYMBOL_GPL(nfs4_pnfs_write); EXPORT_TRACEPOINT_SYMBOL_GPL(nfs4_pnfs_commit_ds); + +EXPORT_TRACEPOINT_SYMBOL_GPL(pnfs_mds_fallback_pg_init_read); +EXPORT_TRACEPOINT_SYMBOL_GPL(pnfs_mds_fallback_pg_init_write); +EXPORT_TRACEPOINT_SYMBOL_GPL(pnfs_mds_fallback_pg_get_mirror_count); +EXPORT_TRACEPOINT_SYMBOL_GPL(pnfs_mds_fallback_read_done); +EXPORT_TRACEPOINT_SYMBOL_GPL(pnfs_mds_fallback_write_done); +EXPORT_TRACEPOINT_SYMBOL_GPL(pnfs_mds_fallback_read_pagelist); +EXPORT_TRACEPOINT_SYMBOL_GPL(pnfs_mds_fallback_write_pagelist); #endif diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h index d85f20945a2b..b2f395fa7350 100644 --- a/fs/nfs/nfs4trace.h +++ b/fs/nfs/nfs4trace.h @@ -1771,6 +1771,7 @@ TRACE_DEFINE_ENUM(PNFS_UPDATE_LAYOUT_BLOCKED); TRACE_DEFINE_ENUM(PNFS_UPDATE_LAYOUT_INVALID_OPEN); TRACE_DEFINE_ENUM(PNFS_UPDATE_LAYOUT_RETRY); TRACE_DEFINE_ENUM(PNFS_UPDATE_LAYOUT_SEND_LAYOUTGET); +TRACE_DEFINE_ENUM(PNFS_UPDATE_LAYOUT_EXIT); #define show_pnfs_update_layout_reason(reason) \ __print_symbolic(reason, \ @@ -1786,7 +1787,8 @@ TRACE_DEFINE_ENUM(PNFS_UPDATE_LAYOUT_SEND_LAYOUTGET); { PNFS_UPDATE_LAYOUT_BLOCKED, "layouts blocked" }, \ { PNFS_UPDATE_LAYOUT_INVALID_OPEN, "invalid open" }, \ { PNFS_UPDATE_LAYOUT_RETRY, "retrying" }, \ - { PNFS_UPDATE_LAYOUT_SEND_LAYOUTGET, "sent layoutget" }) + { PNFS_UPDATE_LAYOUT_SEND_LAYOUTGET, "sent layoutget" }, \ + { PNFS_UPDATE_LAYOUT_EXIT, "exit" }) TRACE_EVENT(pnfs_update_layout, TP_PROTO(struct inode *inode, @@ -1845,6 +1847,78 @@ TRACE_EVENT(pnfs_update_layout, ) ); +DECLARE_EVENT_CLASS(pnfs_layout_event, + TP_PROTO(struct inode *inode, + loff_t pos, + u64 count, + enum pnfs_iomode iomode, + struct pnfs_layout_hdr *lo, + struct pnfs_layout_segment *lseg + ), + TP_ARGS(inode, pos, count, iomode, lo, lseg), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(u64, fileid) + __field(u32, fhandle) + __field(loff_t, pos) + __field(u64, count) + __field(enum pnfs_iomode, iomode) + __field(int, layoutstateid_seq) + __field(u32, layoutstateid_hash) + __field(long, lseg) + ), + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->fileid = NFS_FILEID(inode); + __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode)); + __entry->pos = pos; + __entry->count = count; + __entry->iomode = iomode; + if (lo != NULL) { + __entry->layoutstateid_seq = + be32_to_cpu(lo->plh_stateid.seqid); + __entry->layoutstateid_hash = + nfs_stateid_hash(&lo->plh_stateid); + } else { + __entry->layoutstateid_seq = 0; + __entry->layoutstateid_hash = 0; + } + __entry->lseg = (long)lseg; + ), + TP_printk( + "fileid=%02x:%02x:%llu fhandle=0x%08x " + "iomode=%s pos=%llu count=%llu " + "layoutstateid=%d:0x%08x lseg=0x%lx", + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->fileid, + __entry->fhandle, + show_pnfs_iomode(__entry->iomode), + (unsigned long long)__entry->pos, + (unsigned long long)__entry->count, + __entry->layoutstateid_seq, __entry->layoutstateid_hash, + __entry->lseg + ) +); + +#define DEFINE_PNFS_LAYOUT_EVENT(name) \ + DEFINE_EVENT(pnfs_layout_event, name, \ + TP_PROTO(struct inode *inode, \ + loff_t pos, \ + u64 count, \ + enum pnfs_iomode iomode, \ + struct pnfs_layout_hdr *lo, \ + struct pnfs_layout_segment *lseg \ + ), \ + TP_ARGS(inode, pos, count, iomode, lo, lseg)) + +DEFINE_PNFS_LAYOUT_EVENT(pnfs_mds_fallback_pg_init_read); +DEFINE_PNFS_LAYOUT_EVENT(pnfs_mds_fallback_pg_init_write); +DEFINE_PNFS_LAYOUT_EVENT(pnfs_mds_fallback_pg_get_mirror_count); +DEFINE_PNFS_LAYOUT_EVENT(pnfs_mds_fallback_read_done); +DEFINE_PNFS_LAYOUT_EVENT(pnfs_mds_fallback_write_done); +DEFINE_PNFS_LAYOUT_EVENT(pnfs_mds_fallback_read_pagelist); +DEFINE_PNFS_LAYOUT_EVENT(pnfs_mds_fallback_write_pagelist); + #endif /* CONFIG_NFS_V4_1 */ #endif /* _TRACE_NFS4_H */ diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 758917463700..75bd5b552ba4 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -2037,6 +2037,8 @@ lookup_again: out_put_layout_hdr: if (first) pnfs_clear_first_layoutget(lo); + trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, + PNFS_UPDATE_LAYOUT_EXIT); pnfs_put_layout_hdr(lo); out: dprintk("%s: inode %s/%llu pNFS layout segment %s for " diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index 22494d170619..fd59904a282c 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -660,6 +660,7 @@ enum pnfs_update_layout_reason { PNFS_UPDATE_LAYOUT_BLOCKED, PNFS_UPDATE_LAYOUT_INVALID_OPEN, PNFS_UPDATE_LAYOUT_SEND_LAYOUTGET, + PNFS_UPDATE_LAYOUT_EXIT, }; #define NFS4_OP_MAP_NUM_LONGS \ -- cgit v1.2.3-59-g8ed1b