aboutsummaryrefslogtreecommitdiffstats
path: root/fs/nfs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/nfs')
-rw-r--r--fs/nfs/Makefile2
-rw-r--r--fs/nfs/blocklayout/blocklayout.c37
-rw-r--r--fs/nfs/blocklayout/dev.c42
-rw-r--r--fs/nfs/blocklayout/rpc_pipefs.c1
-rw-r--r--fs/nfs/callback.c96
-rw-r--r--fs/nfs/callback.h2
-rw-r--r--fs/nfs/callback_proc.c32
-rw-r--r--fs/nfs/callback_xdr.c23
-rw-r--r--fs/nfs/client.c31
-rw-r--r--fs/nfs/delegation.c38
-rw-r--r--fs/nfs/dir.c892
-rw-r--r--fs/nfs/direct.c127
-rw-r--r--fs/nfs/dns_resolve.c7
-rw-r--r--fs/nfs/dns_resolve.h2
-rw-r--r--fs/nfs/export.c2
-rw-r--r--fs/nfs/file.c216
-rw-r--r--fs/nfs/filelayout/filelayout.c11
-rw-r--r--fs/nfs/filelayout/filelayout.h2
-rw-r--r--fs/nfs/filelayout/filelayoutdev.c4
-rw-r--r--fs/nfs/flexfilelayout/flexfilelayout.c166
-rw-r--r--fs/nfs/flexfilelayout/flexfilelayoutdev.c6
-rw-r--r--fs/nfs/fs_context.c51
-rw-r--r--fs/nfs/fscache-index.c140
-rw-r--r--fs/nfs/fscache.c508
-rw-r--r--fs/nfs/fscache.h191
-rw-r--r--fs/nfs/inode.c128
-rw-r--r--fs/nfs/internal.h137
-rw-r--r--fs/nfs/mount_clnt.c4
-rw-r--r--fs/nfs/namespace.c2
-rw-r--r--fs/nfs/nfs2xdr.c3
-rw-r--r--fs/nfs/nfs3client.c5
-rw-r--r--fs/nfs/nfs3proc.c8
-rw-r--r--fs/nfs/nfs3xdr.c30
-rw-r--r--fs/nfs/nfs42proc.c65
-rw-r--r--fs/nfs/nfs42xattr.c18
-rw-r--r--fs/nfs/nfs42xdr.c178
-rw-r--r--fs/nfs/nfs4_fs.h16
-rw-r--r--fs/nfs/nfs4client.c30
-rw-r--r--fs/nfs/nfs4file.c25
-rw-r--r--fs/nfs/nfs4idmap.c48
-rw-r--r--fs/nfs/nfs4namespace.c38
-rw-r--r--fs/nfs/nfs4proc.c516
-rw-r--r--fs/nfs/nfs4state.c107
-rw-r--r--fs/nfs/nfs4trace.h50
-rw-r--r--fs/nfs/nfs4xdr.c155
-rw-r--r--fs/nfs/nfsroot.c2
-rw-r--r--fs/nfs/nfstrace.h437
-rw-r--r--fs/nfs/pagelist.c14
-rw-r--r--fs/nfs/pnfs.c87
-rw-r--r--fs/nfs/pnfs.h12
-rw-r--r--fs/nfs/pnfs_nfs.c18
-rw-r--r--fs/nfs/proc.c1
-rw-r--r--fs/nfs/read.c57
-rw-r--r--fs/nfs/super.c62
-rw-r--r--fs/nfs/symlink.c16
-rw-r--r--fs/nfs/sysfs.c3
-rw-r--r--fs/nfs/unlink.c9
-rw-r--r--fs/nfs/write.c228
58 files changed, 3005 insertions, 2133 deletions
diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile
index 22d11fdc6deb..5f6db37f461e 100644
--- a/fs/nfs/Makefile
+++ b/fs/nfs/Makefile
@@ -12,7 +12,7 @@ nfs-y := client.o dir.o file.o getroot.o inode.o super.o \
export.o sysfs.o fs_context.o
nfs-$(CONFIG_ROOT_NFS) += nfsroot.o
nfs-$(CONFIG_SYSCTL) += sysctl.o
-nfs-$(CONFIG_NFS_FSCACHE) += fscache.o fscache-index.o
+nfs-$(CONFIG_NFS_FSCACHE) += fscache.o
obj-$(CONFIG_NFS_V2) += nfsv2.o
nfsv2-y := nfs2super.o proc.o nfs2xdr.o
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index fe860c538747..943aeea1eb16 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -115,30 +115,13 @@ bl_submit_bio(struct bio *bio)
return NULL;
}
-static struct bio *bl_alloc_init_bio(unsigned int npg,
- struct block_device *bdev, sector_t disk_sector,
- bio_end_io_t end_io, struct parallel_io *par)
-{
- struct bio *bio;
-
- npg = bio_max_segs(npg);
- bio = bio_alloc(GFP_NOIO, npg);
- if (bio) {
- bio->bi_iter.bi_sector = disk_sector;
- bio_set_dev(bio, bdev);
- bio->bi_end_io = end_io;
- bio->bi_private = par;
- }
- return bio;
-}
-
static bool offset_in_map(u64 offset, struct pnfs_block_dev_map *map)
{
return offset >= map->start && offset < map->start + map->len;
}
static struct bio *
-do_add_page_to_bio(struct bio *bio, int npg, int rw, sector_t isect,
+do_add_page_to_bio(struct bio *bio, int npg, enum req_op op, sector_t isect,
struct page *page, struct pnfs_block_dev_map *map,
struct pnfs_block_extent *be, bio_end_io_t end_io,
struct parallel_io *par, unsigned int offset, int *len)
@@ -148,7 +131,7 @@ do_add_page_to_bio(struct bio *bio, int npg, int rw, sector_t isect,
u64 disk_addr, end;
dprintk("%s: npg %d rw %d isect %llu offset %u len %d\n", __func__,
- npg, rw, (unsigned long long)isect, offset, *len);
+ npg, (__force u32)op, (unsigned long long)isect, offset, *len);
/* translate to device offset */
isect += be->be_v_offset;
@@ -171,11 +154,10 @@ do_add_page_to_bio(struct bio *bio, int npg, int rw, sector_t isect,
retry:
if (!bio) {
- bio = bl_alloc_init_bio(npg, map->bdev,
- disk_addr >> SECTOR_SHIFT, end_io, par);
- if (!bio)
- return ERR_PTR(-ENOMEM);
- bio_set_op_attrs(bio, rw, 0);
+ bio = bio_alloc(map->bdev, bio_max_segs(npg), op, GFP_NOIO);
+ bio->bi_iter.bi_sector = disk_addr >> SECTOR_SHIFT;
+ bio->bi_end_io = end_io;
+ bio->bi_private = par;
}
if (bio_add_page(bio, page, *len, offset) < *len) {
bio = bl_submit_bio(bio);
@@ -309,7 +291,7 @@ bl_read_pagelist(struct nfs_pgio_header *header)
} else {
bio = do_add_page_to_bio(bio,
header->page_array.npages - i,
- READ,
+ REQ_OP_READ,
isect, pages[i], &map, &be,
bl_end_io_read, par,
pg_offset, &pg_len);
@@ -438,9 +420,8 @@ bl_write_pagelist(struct nfs_pgio_header *header, int sync)
pg_len = PAGE_SIZE;
bio = do_add_page_to_bio(bio, header->page_array.npages - i,
- WRITE, isect, pages[i], &map, &be,
- bl_end_io_write, par,
- 0, &pg_len);
+ REQ_OP_WRITE, isect, pages[i], &map,
+ &be, bl_end_io_write, par, 0, &pg_len);
if (IS_ERR(bio)) {
header->pnfs_error = PTR_ERR(bio);
bio = NULL;
diff --git a/fs/nfs/blocklayout/dev.c b/fs/nfs/blocklayout/dev.c
index 5e56da748b2a..fea5f8821da5 100644
--- a/fs/nfs/blocklayout/dev.c
+++ b/fs/nfs/blocklayout/dev.c
@@ -301,18 +301,14 @@ bl_validate_designator(struct pnfs_block_volume *v)
}
}
-/*
- * Try to open the udev path for the WWN. At least on Debian the udev
- * by-id path will always point to the dm-multipath device if one exists.
- */
static struct block_device *
-bl_open_udev_path(struct pnfs_block_volume *v)
+bl_open_path(struct pnfs_block_volume *v, const char *prefix)
{
struct block_device *bdev;
const char *devname;
- devname = kasprintf(GFP_KERNEL, "/dev/disk/by-id/wwn-0x%*phN",
- v->scsi.designator_len, v->scsi.designator);
+ devname = kasprintf(GFP_KERNEL, "/dev/disk/by-id/%s%*phN",
+ prefix, v->scsi.designator_len, v->scsi.designator);
if (!devname)
return ERR_PTR(-ENOMEM);
@@ -326,28 +322,6 @@ bl_open_udev_path(struct pnfs_block_volume *v)
return bdev;
}
-/*
- * Try to open the RH/Fedora specific dm-mpath udev path for this WWN, as the
- * wwn- links will only point to the first discovered SCSI device there.
- */
-static struct block_device *
-bl_open_dm_mpath_udev_path(struct pnfs_block_volume *v)
-{
- struct block_device *bdev;
- const char *devname;
-
- devname = kasprintf(GFP_KERNEL,
- "/dev/disk/by-id/dm-uuid-mpath-%d%*phN",
- v->scsi.designator_type,
- v->scsi.designator_len, v->scsi.designator);
- if (!devname)
- return ERR_PTR(-ENOMEM);
-
- bdev = blkdev_get_by_path(devname, FMODE_READ | FMODE_WRITE, NULL);
- kfree(devname);
- return bdev;
-}
-
static int
bl_parse_scsi(struct nfs_server *server, struct pnfs_block_dev *d,
struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask)
@@ -360,9 +334,15 @@ bl_parse_scsi(struct nfs_server *server, struct pnfs_block_dev *d,
if (!bl_validate_designator(v))
return -EINVAL;
- bdev = bl_open_dm_mpath_udev_path(v);
+ /*
+ * Try to open the RH/Fedora specific dm-mpath udev path first, as the
+ * wwn- links will only point to the first discovered SCSI device there.
+ * On other distributions like Debian, the default SCSI by-id path will
+ * point to the dm-multipath device if one exists.
+ */
+ bdev = bl_open_path(v, "dm-uuid-mpath-0x");
if (IS_ERR(bdev))
- bdev = bl_open_udev_path(v);
+ bdev = bl_open_path(v, "wwn-0x");
if (IS_ERR(bdev))
return PTR_ERR(bdev);
d->bdev = bdev;
diff --git a/fs/nfs/blocklayout/rpc_pipefs.c b/fs/nfs/blocklayout/rpc_pipefs.c
index ef9db135c649..6c977288cc28 100644
--- a/fs/nfs/blocklayout/rpc_pipefs.c
+++ b/fs/nfs/blocklayout/rpc_pipefs.c
@@ -27,7 +27,6 @@
*/
#include <linux/module.h>
-#include <linux/genhd.h>
#include <linux/blkdev.h>
#include "blocklayout.h"
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index 86d856de1389..456af7d230cf 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -17,7 +17,6 @@
#include <linux/errno.h>
#include <linux/mutex.h>
#include <linux/freezer.h>
-#include <linux/kthread.h>
#include <linux/sunrpc/svcauth_gss.h>
#include <linux/sunrpc/bc_xprt.h>
@@ -45,18 +44,18 @@ static int nfs4_callback_up_net(struct svc_serv *serv, struct net *net)
int ret;
struct nfs_net *nn = net_generic(net, nfs_net_id);
- ret = svc_create_xprt(serv, "tcp", net, PF_INET,
- nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS,
- cred);
+ ret = svc_xprt_create(serv, "tcp", net, PF_INET,
+ nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS,
+ cred);
if (ret <= 0)
goto out_err;
nn->nfs_callback_tcpport = ret;
dprintk("NFS: Callback listener port = %u (af %u, net %x)\n",
nn->nfs_callback_tcpport, PF_INET, net->ns.inum);
- ret = svc_create_xprt(serv, "tcp", net, PF_INET6,
- nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS,
- cred);
+ ret = svc_xprt_create(serv, "tcp", net, PF_INET6,
+ nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS,
+ cred);
if (ret > 0) {
nn->nfs_callback_tcpport6 = ret;
dprintk("NFS: Callback listener port = %u (af %u, net %x)\n",
@@ -92,8 +91,8 @@ nfs4_callback_svc(void *vrqstp)
continue;
svc_process(rqstp);
}
+
svc_exit_thread(rqstp);
- module_put_and_exit(0);
return 0;
}
@@ -136,8 +135,8 @@ nfs41_callback_svc(void *vrqstp)
finish_wait(&serv->sv_cb_waitq, &wq);
}
}
+
svc_exit_thread(rqstp);
- module_put_and_exit(0);
return 0;
}
@@ -169,12 +168,12 @@ static int nfs_callback_start_svc(int minorversion, struct rpc_xprt *xprt,
if (nrservs < NFS4_MIN_NR_CALLBACK_THREADS)
nrservs = NFS4_MIN_NR_CALLBACK_THREADS;
- if (serv->sv_nrthreads-1 == nrservs)
+ if (serv->sv_nrthreads == nrservs)
return 0;
- ret = serv->sv_ops->svo_setup(serv, NULL, nrservs);
+ ret = svc_set_num_threads(serv, NULL, nrservs);
if (ret) {
- serv->sv_ops->svo_setup(serv, NULL, 0);
+ svc_set_num_threads(serv, NULL, 0);
return ret;
}
dprintk("nfs_callback_up: service started\n");
@@ -189,7 +188,7 @@ static void nfs_callback_down_net(u32 minorversion, struct svc_serv *serv, struc
return;
dprintk("NFS: destroy per-net callback data; net=%x\n", net->ns.inum);
- svc_shutdown_net(serv, net);
+ svc_xprt_destroy_all(serv, net);
}
static int nfs_callback_up_net(int minorversion, struct svc_serv *serv,
@@ -232,59 +231,17 @@ err_bind:
return ret;
}
-static const struct svc_serv_ops nfs40_cb_sv_ops = {
- .svo_function = nfs4_callback_svc,
- .svo_enqueue_xprt = svc_xprt_do_enqueue,
- .svo_setup = svc_set_num_threads_sync,
- .svo_module = THIS_MODULE,
-};
-#if defined(CONFIG_NFS_V4_1)
-static const struct svc_serv_ops nfs41_cb_sv_ops = {
- .svo_function = nfs41_callback_svc,
- .svo_enqueue_xprt = svc_xprt_do_enqueue,
- .svo_setup = svc_set_num_threads_sync,
- .svo_module = THIS_MODULE,
-};
-
-static const struct svc_serv_ops *nfs4_cb_sv_ops[] = {
- [0] = &nfs40_cb_sv_ops,
- [1] = &nfs41_cb_sv_ops,
-};
-#else
-static const struct svc_serv_ops *nfs4_cb_sv_ops[] = {
- [0] = &nfs40_cb_sv_ops,
- [1] = NULL,
-};
-#endif
-
static struct svc_serv *nfs_callback_create_svc(int minorversion)
{
struct nfs_callback_data *cb_info = &nfs_callback_info[minorversion];
- const struct svc_serv_ops *sv_ops;
+ int (*threadfn)(void *data);
struct svc_serv *serv;
/*
* Check whether we're already up and running.
*/
- if (cb_info->serv) {
- /*
- * Note: increase service usage, because later in case of error
- * svc_destroy() will be called.
- */
- svc_get(cb_info->serv);
- return cb_info->serv;
- }
-
- switch (minorversion) {
- case 0:
- sv_ops = nfs4_cb_sv_ops[0];
- break;
- default:
- sv_ops = nfs4_cb_sv_ops[1];
- }
-
- if (sv_ops == NULL)
- return ERR_PTR(-ENOTSUPP);
+ if (cb_info->serv)
+ return svc_get(cb_info->serv);
/*
* Sanity check: if there's no task,
@@ -294,7 +251,16 @@ static struct svc_serv *nfs_callback_create_svc(int minorversion)
printk(KERN_WARNING "nfs_callback_create_svc: no kthread, %d users??\n",
cb_info->users);
- serv = svc_create_pooled(&nfs4_callback_program, NFS4_CALLBACK_BUFSIZE, sv_ops);
+ threadfn = nfs4_callback_svc;
+#if defined(CONFIG_NFS_V4_1)
+ if (minorversion)
+ threadfn = nfs41_callback_svc;
+#else
+ if (minorversion)
+ return ERR_PTR(-ENOTSUPP);
+#endif
+ serv = svc_create(&nfs4_callback_program, NFS4_CALLBACK_BUFSIZE,
+ threadfn);
if (!serv) {
printk(KERN_ERR "nfs_callback_create_svc: create service failed\n");
return ERR_PTR(-ENOMEM);
@@ -335,16 +301,10 @@ int nfs_callback_up(u32 minorversion, struct rpc_xprt *xprt)
goto err_start;
cb_info->users++;
- /*
- * svc_create creates the svc_serv with sv_nrthreads == 1, and then
- * svc_prepare_thread increments that. So we need to call svc_destroy
- * on both success and failure so that the refcount is 1 when the
- * thread exits.
- */
err_net:
if (!cb_info->users)
cb_info->serv = NULL;
- svc_destroy(serv);
+ svc_put(serv);
err_create:
mutex_unlock(&nfs_callback_mutex);
return ret;
@@ -369,8 +329,8 @@ void nfs_callback_down(int minorversion, struct net *net)
cb_info->users--;
if (cb_info->users == 0) {
svc_get(serv);
- serv->sv_ops->svo_setup(serv, NULL, 0);
- svc_destroy(serv);
+ svc_set_num_threads(serv, NULL, 0);
+ svc_put(serv);
dprintk("nfs_callback_down: service destroyed\n");
cb_info->serv = NULL;
}
diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h
index 6a2033131c06..ccd4f245cae2 100644
--- a/fs/nfs/callback.h
+++ b/fs/nfs/callback.h
@@ -170,7 +170,7 @@ struct cb_devicenotifyitem {
};
struct cb_devicenotifyargs {
- int ndevs;
+ uint32_t ndevs;
struct cb_devicenotifyitem *devs;
};
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index 09c5b1cb3e07..c1eda73254e1 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -288,6 +288,7 @@ static u32 initiate_file_draining(struct nfs_client *clp,
rv = NFS4_OK;
break;
case -ENOENT:
+ set_bit(NFS_LAYOUT_DRAIN, &lo->plh_flags);
/* Embrace your forgetfulness! */
rv = NFS4ERR_NOMATCHING_LAYOUT;
@@ -358,12 +359,11 @@ __be32 nfs4_callback_devicenotify(void *argp, void *resp,
struct cb_process_state *cps)
{
struct cb_devicenotifyargs *args = argp;
- int i;
+ const struct pnfs_layoutdriver_type *ld = NULL;
+ uint32_t i;
__be32 res = 0;
- struct nfs_client *clp = cps->clp;
- struct nfs_server *server = NULL;
- if (!clp) {
+ if (!cps->clp) {
res = cpu_to_be32(NFS4ERR_OP_NOT_IN_SESSION);
goto out;
}
@@ -371,23 +371,15 @@ __be32 nfs4_callback_devicenotify(void *argp, void *resp,
for (i = 0; i < args->ndevs; i++) {
struct cb_devicenotifyitem *dev = &args->devs[i];
- if (!server ||
- server->pnfs_curr_ld->id != dev->cbd_layout_type) {
- rcu_read_lock();
- list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link)
- if (server->pnfs_curr_ld &&
- server->pnfs_curr_ld->id == dev->cbd_layout_type) {
- rcu_read_unlock();
- goto found;
- }
- rcu_read_unlock();
- continue;
+ if (!ld || ld->id != dev->cbd_layout_type) {
+ pnfs_put_layoutdriver(ld);
+ ld = pnfs_find_layoutdriver(dev->cbd_layout_type);
+ if (!ld)
+ continue;
}
-
- found:
- nfs4_delete_deviceid(server->pnfs_curr_ld, clp, &dev->cbd_dev_id);
+ nfs4_delete_deviceid(ld, cps->clp, &dev->cbd_dev_id);
}
-
+ pnfs_put_layoutdriver(ld);
out:
kfree(args->devs);
return res;
@@ -710,7 +702,7 @@ __be32 nfs4_callback_offload(void *data, void *dummy,
struct nfs4_copy_state *copy, *tmp_copy;
bool found = false;
- copy = kzalloc(sizeof(struct nfs4_copy_state), GFP_NOFS);
+ copy = kzalloc(sizeof(struct nfs4_copy_state), GFP_KERNEL);
if (!copy)
return htonl(NFS4ERR_SERVERFAULT);
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index a67c41ec545f..d0cccddb7d08 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -258,11 +258,9 @@ __be32 decode_devicenotify_args(struct svc_rqst *rqstp,
void *argp)
{
struct cb_devicenotifyargs *args = argp;
+ uint32_t tmp, n, i;
__be32 *p;
__be32 status = 0;
- u32 tmp;
- int n, i;
- args->ndevs = 0;
/* Num of device notifications */
p = xdr_inline_decode(xdr, sizeof(uint32_t));
@@ -271,12 +269,8 @@ __be32 decode_devicenotify_args(struct svc_rqst *rqstp,
goto out;
}
n = ntohl(*p++);
- if (n <= 0)
- goto out;
- if (n > ULONG_MAX / sizeof(*args->devs)) {
- status = htonl(NFS4ERR_BADXDR);
+ if (n == 0)
goto out;
- }
args->devs = kmalloc_array(n, sizeof(*args->devs), GFP_KERNEL);
if (!args->devs) {
@@ -330,19 +324,21 @@ __be32 decode_devicenotify_args(struct svc_rqst *rqstp,
dev->cbd_immediate = 0;
}
- args->ndevs++;
-
dprintk("%s: type %d layout 0x%x immediate %d\n",
__func__, dev->cbd_notify_type, dev->cbd_layout_type,
dev->cbd_immediate);
}
+ args->ndevs = n;
+ dprintk("%s: ndevs %d\n", __func__, args->ndevs);
+ return 0;
+err:
+ kfree(args->devs);
out:
+ args->devs = NULL;
+ args->ndevs = 0;
dprintk("%s: status %d ndevs %d\n",
__func__, ntohl(status), args->ndevs);
return status;
-err:
- kfree(args->devs);
- goto out;
}
static __be32 decode_sessionid(struct xdr_stream *xdr,
@@ -1069,6 +1065,7 @@ static const struct svc_procedure nfs4_callback_procedures1[] = {
.pc_func = nfs4_callback_compound,
.pc_encode = nfs4_encode_void,
.pc_argsize = 256,
+ .pc_argzero = 256,
.pc_ressize = 256,
.pc_xdrressize = NFS4_CALLBACK_BUFSIZE,
.pc_name = "COMPOUND",
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 1e4dc1ab9312..f50e025ae406 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -177,14 +177,13 @@ struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_init)
INIT_LIST_HEAD(&clp->cl_superblocks);
clp->cl_rpcclient = ERR_PTR(-EINVAL);
+ clp->cl_flags = cl_init->init_flags;
clp->cl_proto = cl_init->proto;
clp->cl_nconnect = cl_init->nconnect;
clp->cl_max_connect = cl_init->max_connect ? cl_init->max_connect : 1;
clp->cl_net = get_net(cl_init->net);
clp->cl_principal = "*";
- nfs_fscache_get_client_cookie(clp);
-
return clp;
error_cleanup:
@@ -238,8 +237,6 @@ static void pnfs_init_server(struct nfs_server *server)
*/
void nfs_free_client(struct nfs_client *clp)
{
- nfs_fscache_release_client_cookie(clp);
-
/* -EIO all pending I/O */
if (!IS_ERR(clp->cl_rpcclient))
rpc_shutdown_client(clp->cl_rpcclient);
@@ -283,7 +280,7 @@ EXPORT_SYMBOL_GPL(nfs_put_client);
static struct nfs_client *nfs_match_client(const struct nfs_client_initdata *data)
{
struct nfs_client *clp;
- const struct sockaddr *sap = data->addr;
+ const struct sockaddr *sap = (struct sockaddr *)data->addr;
struct nfs_net *nn = net_generic(data->net, nfs_net_id);
int error;
@@ -427,7 +424,6 @@ struct nfs_client *nfs_get_client(const struct nfs_client_initdata *cl_init)
list_add_tail(&new->cl_share_link,
&nn->nfs_client_list);
spin_unlock(&nn->nfs_client_lock);
- new->cl_flags = cl_init->init_flags;
return rpc_ops->init_client(new, cl_init);
}
@@ -670,7 +666,7 @@ static int nfs_init_server(struct nfs_server *server,
struct rpc_timeout timeparms;
struct nfs_client_initdata cl_init = {
.hostname = ctx->nfs_server.hostname,
- .addr = (const struct sockaddr *)&ctx->nfs_server.address,
+ .addr = &ctx->nfs_server._address,
.addrlen = ctx->nfs_server.addrlen,
.nfs_mod = ctx->nfs_mod,
.proto = ctx->nfs_server.protocol,
@@ -712,9 +708,9 @@ static int nfs_init_server(struct nfs_server *server,
}
if (ctx->rsize)
- server->rsize = nfs_block_size(ctx->rsize, NULL);
+ server->rsize = nfs_io_size(ctx->rsize, clp->cl_proto);
if (ctx->wsize)
- server->wsize = nfs_block_size(ctx->wsize, NULL);
+ server->wsize = nfs_io_size(ctx->wsize, clp->cl_proto);
server->acregmin = ctx->acregmin * HZ;
server->acregmax = ctx->acregmax * HZ;
@@ -759,18 +755,19 @@ error:
static void nfs_server_set_fsinfo(struct nfs_server *server,
struct nfs_fsinfo *fsinfo)
{
+ struct nfs_client *clp = server->nfs_client;
unsigned long max_rpc_payload, raw_max_rpc_payload;
/* Work out a lot of parameters */
if (server->rsize == 0)
- server->rsize = nfs_block_size(fsinfo->rtpref, NULL);
+ server->rsize = nfs_io_size(fsinfo->rtpref, clp->cl_proto);
if (server->wsize == 0)
- server->wsize = nfs_block_size(fsinfo->wtpref, NULL);
+ server->wsize = nfs_io_size(fsinfo->wtpref, clp->cl_proto);
if (fsinfo->rtmax >= 512 && server->rsize > fsinfo->rtmax)
- server->rsize = nfs_block_size(fsinfo->rtmax, NULL);
+ server->rsize = nfs_io_size(fsinfo->rtmax, clp->cl_proto);
if (fsinfo->wtmax >= 512 && server->wsize > fsinfo->wtmax)
- server->wsize = nfs_block_size(fsinfo->wtmax, NULL);
+ server->wsize = nfs_io_size(fsinfo->wtmax, clp->cl_proto);
raw_max_rpc_payload = rpc_max_payload(server->client);
max_rpc_payload = nfs_block_size(raw_max_rpc_payload, NULL);
@@ -860,6 +857,14 @@ static int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *mntfh, str
server->namelen = pathinfo.max_namelen;
}
+ if (clp->rpc_ops->discover_trunking != NULL &&
+ (server->caps & NFS_CAP_FS_LOCATIONS &&
+ (server->flags & NFS_MOUNT_TRUNK_DISCOVERY))) {
+ error = clp->rpc_ops->discover_trunking(server, mntfh);
+ if (error < 0)
+ return error;
+ }
+
return 0;
}
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index 7c9eb679dbdb..ead8a0e06abf 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -228,8 +228,7 @@ again:
*
*/
void nfs_inode_reclaim_delegation(struct inode *inode, const struct cred *cred,
- fmode_t type,
- const nfs4_stateid *stateid,
+ fmode_t type, const nfs4_stateid *stateid,
unsigned long pagemod_limit)
{
struct nfs_delegation *delegation;
@@ -239,25 +238,24 @@ void nfs_inode_reclaim_delegation(struct inode *inode, const struct cred *cred,
delegation = rcu_dereference(NFS_I(inode)->delegation);
if (delegation != NULL) {
spin_lock(&delegation->lock);
- if (nfs4_is_valid_delegation(delegation, 0)) {
- nfs4_stateid_copy(&delegation->stateid, stateid);
- delegation->type = type;
- delegation->pagemod_limit = pagemod_limit;
- oldcred = delegation->cred;
- delegation->cred = get_cred(cred);
- clear_bit(NFS_DELEGATION_NEED_RECLAIM,
- &delegation->flags);
- spin_unlock(&delegation->lock);
- rcu_read_unlock();
- put_cred(oldcred);
- trace_nfs4_reclaim_delegation(inode, type);
- return;
- }
- /* We appear to have raced with a delegation return. */
+ nfs4_stateid_copy(&delegation->stateid, stateid);
+ delegation->type = type;
+ delegation->pagemod_limit = pagemod_limit;
+ oldcred = delegation->cred;
+ delegation->cred = get_cred(cred);
+ clear_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags);
+ if (test_and_clear_bit(NFS_DELEGATION_REVOKED,
+ &delegation->flags))
+ atomic_long_inc(&nfs_active_delegations);
spin_unlock(&delegation->lock);
+ rcu_read_unlock();
+ put_cred(oldcred);
+ trace_nfs4_reclaim_delegation(inode, type);
+ } else {
+ rcu_read_unlock();
+ nfs_inode_set_delegation(inode, cred, type, stateid,
+ pagemod_limit);
}
- rcu_read_unlock();
- nfs_inode_set_delegation(inode, cred, type, stateid, pagemod_limit);
}
static int nfs_do_return_delegation(struct inode *inode, struct nfs_delegation *delegation, int issync)
@@ -439,7 +437,7 @@ int nfs_inode_set_delegation(struct inode *inode, const struct cred *cred,
struct nfs_delegation *freeme = NULL;
int status = 0;
- delegation = kmalloc(sizeof(*delegation), GFP_NOFS);
+ delegation = kmalloc(sizeof(*delegation), GFP_KERNEL_ACCOUNT);
if (delegation == NULL)
return -ENOMEM;
nfs4_stateid_copy(&delegation->stateid, stateid);
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 731d31015b6a..f594dac436a7 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -18,6 +18,7 @@
* 6 Jun 1999 Cache readdir lookups in the page cache. -DaveM
*/
+#include <linux/compat.h>
#include <linux/module.h>
#include <linux/time.h>
#include <linux/errno.h>
@@ -38,6 +39,7 @@
#include <linux/sched.h>
#include <linux/kmemleak.h>
#include <linux/xattr.h>
+#include <linux/hash.h>
#include "delegation.h"
#include "iostat.h"
@@ -53,7 +55,7 @@ static int nfs_closedir(struct inode *, struct file *);
static int nfs_readdir(struct file *, struct dir_context *);
static int nfs_fsync_dir(struct file *, loff_t, loff_t, int);
static loff_t nfs_llseek_dir(struct file *, loff_t, int);
-static void nfs_readdir_clear_array(struct page*);
+static void nfs_readdir_free_folio(struct folio *);
const struct file_operations nfs_dir_operations = {
.llseek = nfs_llseek_dir,
@@ -65,28 +67,29 @@ const struct file_operations nfs_dir_operations = {
};
const struct address_space_operations nfs_dir_aops = {
- .freepage = nfs_readdir_clear_array,
+ .free_folio = nfs_readdir_free_folio,
};
-static struct nfs_open_dir_context *alloc_nfs_open_dir_context(struct inode *dir)
+#define NFS_INIT_DTSIZE PAGE_SIZE
+
+static struct nfs_open_dir_context *
+alloc_nfs_open_dir_context(struct inode *dir)
{
struct nfs_inode *nfsi = NFS_I(dir);
struct nfs_open_dir_context *ctx;
- ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
+
+ ctx = kzalloc(sizeof(*ctx), GFP_KERNEL_ACCOUNT);
if (ctx != NULL) {
- ctx->duped = 0;
ctx->attr_gencount = nfsi->attr_gencount;
- ctx->dir_cookie = 0;
- ctx->dup_cookie = 0;
- ctx->page_index = 0;
+ ctx->dtsize = NFS_INIT_DTSIZE;
spin_lock(&dir->i_lock);
if (list_empty(&nfsi->open_files) &&
(nfsi->cache_validity & NFS_INO_DATA_INVAL_DEFER))
nfs_set_cache_invalid(dir,
NFS_INO_INVALID_DATA |
NFS_INO_REVAL_FORCED);
- list_add(&ctx->list, &nfsi->open_files);
- clear_bit(NFS_INO_FORCE_READDIR, &nfsi->flags);
+ list_add_tail_rcu(&ctx->list, &nfsi->open_files);
+ memcpy(ctx->verf, nfsi->cookieverf, sizeof(ctx->verf));
spin_unlock(&dir->i_lock);
return ctx;
}
@@ -96,9 +99,9 @@ static struct nfs_open_dir_context *alloc_nfs_open_dir_context(struct inode *dir
static void put_nfs_open_dir_context(struct inode *dir, struct nfs_open_dir_context *ctx)
{
spin_lock(&dir->i_lock);
- list_del(&ctx->list);
+ list_del_rcu(&ctx->list);
spin_unlock(&dir->i_lock);
- kfree(ctx);
+ kfree_rcu(ctx, rcu_head);
}
/*
@@ -140,6 +143,7 @@ struct nfs_cache_array_entry {
};
struct nfs_cache_array {
+ u64 change_attr;
u64 last_cookie;
unsigned int size;
unsigned char page_full : 1,
@@ -153,11 +157,10 @@ struct nfs_readdir_descriptor {
struct page *page;
struct dir_context *ctx;
pgoff_t page_index;
+ pgoff_t page_index_max;
u64 dir_cookie;
u64 last_cookie;
- u64 dup_cookie;
loff_t current_index;
- loff_t prev_index;
__be32 verf[NFS_DIR_VERIFIER_SIZE];
unsigned long dir_verifier;
@@ -165,23 +168,47 @@ struct nfs_readdir_descriptor {
unsigned long gencount;
unsigned long attr_gencount;
unsigned int cache_entry_index;
- signed char duped;
+ unsigned int buffer_fills;
+ unsigned int dtsize;
+ bool clear_cache;
bool plus;
+ bool eob;
bool eof;
};
-static void nfs_readdir_array_init(struct nfs_cache_array *array)
+static void nfs_set_dtsize(struct nfs_readdir_descriptor *desc, unsigned int sz)
+{
+ struct nfs_server *server = NFS_SERVER(file_inode(desc->file));
+ unsigned int maxsize = server->dtsize;
+
+ if (sz > maxsize)
+ sz = maxsize;
+ if (sz < NFS_MIN_FILE_IO_SIZE)
+ sz = NFS_MIN_FILE_IO_SIZE;
+ desc->dtsize = sz;
+}
+
+static void nfs_shrink_dtsize(struct nfs_readdir_descriptor *desc)
+{
+ nfs_set_dtsize(desc, desc->dtsize >> 1);
+}
+
+static void nfs_grow_dtsize(struct nfs_readdir_descriptor *desc)
{
- memset(array, 0, sizeof(struct nfs_cache_array));
+ nfs_set_dtsize(desc, desc->dtsize << 1);
}
-static void nfs_readdir_page_init_array(struct page *page, u64 last_cookie)
+static void nfs_readdir_page_init_array(struct page *page, u64 last_cookie,
+ u64 change_attr)
{
struct nfs_cache_array *array;
array = kmap_atomic(page);
- nfs_readdir_array_init(array);
+ array->change_attr = change_attr;
array->last_cookie = last_cookie;
+ array->size = 0;
+ array->page_full = 0;
+ array->page_is_eof = 0;
array->cookies_are_ordered = 1;
kunmap_atomic(array);
}
@@ -189,25 +216,36 @@ static void nfs_readdir_page_init_array(struct page *page, u64 last_cookie)
/*
* we are freeing strings created by nfs_add_to_readdir_array()
*/
-static
-void nfs_readdir_clear_array(struct page *page)
+static void nfs_readdir_clear_array(struct page *page)
{
struct nfs_cache_array *array;
- int i;
+ unsigned int i;
array = kmap_atomic(page);
for (i = 0; i < array->size; i++)
kfree(array->array[i].name);
- nfs_readdir_array_init(array);
+ array->size = 0;
kunmap_atomic(array);
}
+static void nfs_readdir_free_folio(struct folio *folio)
+{
+ nfs_readdir_clear_array(&folio->page);
+}
+
+static void nfs_readdir_page_reinit_array(struct page *page, u64 last_cookie,
+ u64 change_attr)
+{
+ nfs_readdir_clear_array(page);
+ nfs_readdir_page_init_array(page, last_cookie, change_attr);
+}
+
static struct page *
nfs_readdir_page_array_alloc(u64 last_cookie, gfp_t gfp_flags)
{
struct page *page = alloc_page(gfp_flags);
if (page)
- nfs_readdir_page_init_array(page, last_cookie);
+ nfs_readdir_page_init_array(page, last_cookie, 0);
return page;
}
@@ -219,6 +257,11 @@ static void nfs_readdir_page_array_free(struct page *page)
}
}
+static u64 nfs_readdir_array_index_cookie(struct nfs_cache_array *array)
+{
+ return array->size == 0 ? array->last_cookie : array->array[0].cookie;
+}
+
static void nfs_readdir_array_set_eof(struct nfs_cache_array *array)
{
array->page_is_eof = 1;
@@ -248,36 +291,40 @@ static const char *nfs_readdir_copy_name(const char *name, unsigned int len)
return ret;
}
+static size_t nfs_readdir_array_maxentries(void)
+{
+ return (PAGE_SIZE - sizeof(struct nfs_cache_array)) /
+ sizeof(struct nfs_cache_array_entry);
+}
+
/*
* Check that the next array entry lies entirely within the page bounds
*/
static int nfs_readdir_array_can_expand(struct nfs_cache_array *array)
{
- struct nfs_cache_array_entry *cache_entry;
-
if (array->page_full)
return -ENOSPC;
- cache_entry = &array->array[array->size + 1];
- if ((char *)cache_entry - (char *)array > PAGE_SIZE) {
+ if (array->size == nfs_readdir_array_maxentries()) {
array->page_full = 1;
return -ENOSPC;
}
return 0;
}
-static
-int nfs_readdir_add_to_array(struct nfs_entry *entry, struct page *page)
+static int nfs_readdir_page_array_append(struct page *page,
+ const struct nfs_entry *entry,
+ u64 *cookie)
{
struct nfs_cache_array *array;
struct nfs_cache_array_entry *cache_entry;
const char *name;
- int ret;
+ int ret = -ENOMEM;
name = nfs_readdir_copy_name(entry->name, entry->len);
- if (!name)
- return -ENOMEM;
array = kmap_atomic(page);
+ if (!name)
+ goto out;
ret = nfs_readdir_array_can_expand(array);
if (ret) {
kfree(name);
@@ -285,7 +332,7 @@ int nfs_readdir_add_to_array(struct nfs_entry *entry, struct page *page)
}
cache_entry = &array->array[array->size];
- cache_entry->cookie = entry->prev_cookie;
+ cache_entry->cookie = array->last_cookie;
cache_entry->ino = entry->ino;
cache_entry->d_type = entry->d_type;
cache_entry->name_len = entry->len;
@@ -297,23 +344,72 @@ int nfs_readdir_add_to_array(struct nfs_entry *entry, struct page *page)
if (entry->eof != 0)
nfs_readdir_array_set_eof(array);
out:
+ *cookie = array->last_cookie;
+ kunmap_atomic(array);
+ return ret;
+}
+
+#define NFS_READDIR_COOKIE_MASK (U32_MAX >> 14)
+/*
+ * Hash algorithm allowing content addressible access to sequences
+ * of directory cookies. Content is addressed by the value of the
+ * cookie index of the first readdir entry in a page.
+ *
+ * We select only the first 18 bits to avoid issues with excessive
+ * memory use for the page cache XArray. 18 bits should allow the caching
+ * of 262144 pages of sequences of readdir entries. Since each page holds
+ * 127 readdir entries for a typical 64-bit system, that works out to a
+ * cache of ~ 33 million entries per directory.
+ */
+static pgoff_t nfs_readdir_page_cookie_hash(u64 cookie)
+{
+ if (cookie == 0)
+ return 0;
+ return hash_64(cookie, 18);
+}
+
+static bool nfs_readdir_page_validate(struct page *page, u64 last_cookie,
+ u64 change_attr)
+{
+ struct nfs_cache_array *array = kmap_atomic(page);
+ int ret = true;
+
+ if (array->change_attr != change_attr)
+ ret = false;
+ if (nfs_readdir_array_index_cookie(array) != last_cookie)
+ ret = false;
kunmap_atomic(array);
return ret;
}
+static void nfs_readdir_page_unlock_and_put(struct page *page)
+{
+ unlock_page(page);
+ put_page(page);
+}
+
+static void nfs_readdir_page_init_and_validate(struct page *page, u64 cookie,
+ u64 change_attr)
+{
+ if (PageUptodate(page)) {
+ if (nfs_readdir_page_validate(page, cookie, change_attr))
+ return;
+ nfs_readdir_clear_array(page);
+ }
+ nfs_readdir_page_init_array(page, cookie, change_attr);
+ SetPageUptodate(page);
+}
+
static struct page *nfs_readdir_page_get_locked(struct address_space *mapping,
- pgoff_t index, u64 last_cookie)
+ u64 cookie, u64 change_attr)
{
+ pgoff_t index = nfs_readdir_page_cookie_hash(cookie);
struct page *page;
page = grab_cache_page(mapping, index);
- if (page && !PageUptodate(page)) {
- nfs_readdir_page_init_array(page, last_cookie);
- if (invalidate_inode_pages2_range(mapping, index + 1, -1) < 0)
- nfs_zap_mapping(mapping->host, mapping);
- SetPageUptodate(page);
- }
-
+ if (!page)
+ return NULL;
+ nfs_readdir_page_init_and_validate(page, cookie, change_attr);
return page;
}
@@ -348,24 +444,19 @@ static void nfs_readdir_page_set_eof(struct page *page)
kunmap_atomic(array);
}
-static void nfs_readdir_page_unlock_and_put(struct page *page)
-{
- unlock_page(page);
- put_page(page);
-}
-
static struct page *nfs_readdir_page_get_next(struct address_space *mapping,
- pgoff_t index, u64 cookie)
+ u64 cookie, u64 change_attr)
{
+ pgoff_t index = nfs_readdir_page_cookie_hash(cookie);
struct page *page;
- page = nfs_readdir_page_get_locked(mapping, index, cookie);
- if (page) {
- if (nfs_readdir_page_last_cookie(page) == cookie)
- return page;
- nfs_readdir_page_unlock_and_put(page);
- }
- return NULL;
+ page = grab_cache_page_nowait(mapping, index);
+ if (!page)
+ return NULL;
+ nfs_readdir_page_init_and_validate(page, cookie, change_attr);
+ if (nfs_readdir_page_last_cookie(page) != cookie)
+ nfs_readdir_page_reinit_array(page, cookie, change_attr);
+ return page;
}
static inline
@@ -387,6 +478,25 @@ bool nfs_readdir_use_cookie(const struct file *filp)
return true;
}
+static void nfs_readdir_seek_next_array(struct nfs_cache_array *array,
+ struct nfs_readdir_descriptor *desc)
+{
+ if (array->page_full) {
+ desc->last_cookie = array->last_cookie;
+ desc->current_index += array->size;
+ desc->cache_entry_index = 0;
+ desc->page_index++;
+ } else
+ desc->last_cookie = nfs_readdir_array_index_cookie(array);
+}
+
+static void nfs_readdir_rewind_search(struct nfs_readdir_descriptor *desc)
+{
+ desc->current_index = 0;
+ desc->last_cookie = 0;
+ desc->page_index = 0;
+}
+
static int nfs_readdir_search_for_pos(struct nfs_cache_array *array,
struct nfs_readdir_descriptor *desc)
{
@@ -398,6 +508,7 @@ static int nfs_readdir_search_for_pos(struct nfs_cache_array *array,
if (diff >= array->size) {
if (array->page_is_eof)
goto out_eof;
+ nfs_readdir_seek_next_array(array, desc);
return -EAGAIN;
}
@@ -410,16 +521,6 @@ out_eof:
return -EBADCOOKIE;
}
-static bool
-nfs_readdir_inode_mapping_valid(struct nfs_inode *nfsi)
-{
- if (nfsi->cache_validity & (NFS_INO_INVALID_CHANGE |
- NFS_INO_INVALID_DATA))
- return false;
- smp_rmb();
- return !test_bit(NFS_INO_INVALIDATING, &nfsi->flags);
-}
-
static bool nfs_readdir_array_cookie_in_range(struct nfs_cache_array *array,
u64 cookie)
{
@@ -436,8 +537,7 @@ static bool nfs_readdir_array_cookie_in_range(struct nfs_cache_array *array,
static int nfs_readdir_search_for_cookie(struct nfs_cache_array *array,
struct nfs_readdir_descriptor *desc)
{
- int i;
- loff_t new_pos;
+ unsigned int i;
int status = -EAGAIN;
if (!nfs_readdir_array_cookie_in_range(array, desc->dir_cookie))
@@ -445,33 +545,10 @@ static int nfs_readdir_search_for_cookie(struct nfs_cache_array *array,
for (i = 0; i < array->size; i++) {
if (array->array[i].cookie == desc->dir_cookie) {
- struct nfs_inode *nfsi = NFS_I(file_inode(desc->file));
-
- new_pos = desc->current_index + i;
- if (desc->attr_gencount != nfsi->attr_gencount ||
- !nfs_readdir_inode_mapping_valid(nfsi)) {
- desc->duped = 0;
- desc->attr_gencount = nfsi->attr_gencount;
- } else if (new_pos < desc->prev_index) {
- if (desc->duped > 0
- && desc->dup_cookie == desc->dir_cookie) {
- if (printk_ratelimit()) {
- pr_notice("NFS: directory %pD2 contains a readdir loop."
- "Please contact your server vendor. "
- "The file: %s has duplicate cookie %llu\n",
- desc->file, array->array[i].name, desc->dir_cookie);
- }
- status = -ELOOP;
- goto out;
- }
- desc->dup_cookie = desc->dir_cookie;
- desc->duped = -1;
- }
if (nfs_readdir_use_cookie(desc->file))
desc->ctx->pos = desc->dir_cookie;
else
- desc->ctx->pos = new_pos;
- desc->prev_index = new_pos;
+ desc->ctx->pos = desc->current_index + i;
desc->cache_entry_index = i;
return 0;
}
@@ -481,8 +558,8 @@ check_eof:
status = -EBADCOOKIE;
if (desc->dir_cookie == array->last_cookie)
desc->eof = true;
- }
-out:
+ } else
+ nfs_readdir_seek_next_array(array, desc);
return status;
}
@@ -498,11 +575,6 @@ static int nfs_readdir_search_array(struct nfs_readdir_descriptor *desc)
else
status = nfs_readdir_search_for_cookie(array, desc);
- if (status == -EAGAIN) {
- desc->last_cookie = array->last_cookie;
- desc->current_index += array->size;
- desc->page_index++;
- }
kunmap_atomic(array);
return status;
}
@@ -538,7 +610,6 @@ static int nfs_readdir_xdr_filler(struct nfs_readdir_descriptor *desc,
/* We requested READDIRPLUS, but the server doesn't grok it */
if (error == -ENOTSUPP && desc->plus) {
NFS_SERVER(inode)->caps &= ~NFS_CAP_READDIRPLUS;
- clear_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(inode)->flags);
desc->plus = arg.plus = false;
goto again;
}
@@ -588,51 +659,68 @@ int nfs_same_file(struct dentry *dentry, struct nfs_entry *entry)
return 1;
}
-static
-bool nfs_use_readdirplus(struct inode *dir, struct dir_context *ctx)
+#define NFS_READDIR_CACHE_USAGE_THRESHOLD (8UL)
+
+static bool nfs_use_readdirplus(struct inode *dir, struct dir_context *ctx,
+ unsigned int cache_hits,
+ unsigned int cache_misses)
{
if (!nfs_server_capable(dir, NFS_CAP_READDIRPLUS))
return false;
- if (test_and_clear_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(dir)->flags))
- return true;
- if (ctx->pos == 0)
+ if (ctx->pos == 0 ||
+ cache_hits + cache_misses > NFS_READDIR_CACHE_USAGE_THRESHOLD)
return true;
return false;
}
/*
- * This function is called by the lookup and getattr code to request the
+ * This function is called by the getattr code to request the
* use of readdirplus to accelerate any future lookups in the same
* directory.
*/
-void nfs_advise_use_readdirplus(struct inode *dir)
+void nfs_readdir_record_entry_cache_hit(struct inode *dir)
{
struct nfs_inode *nfsi = NFS_I(dir);
+ struct nfs_open_dir_context *ctx;
if (nfs_server_capable(dir, NFS_CAP_READDIRPLUS) &&
- !list_empty(&nfsi->open_files))
- set_bit(NFS_INO_ADVISE_RDPLUS, &nfsi->flags);
+ S_ISDIR(dir->i_mode)) {
+ rcu_read_lock();
+ list_for_each_entry_rcu (ctx, &nfsi->open_files, list)
+ atomic_inc(&ctx->cache_hits);
+ rcu_read_unlock();
+ }
}
/*
* This function is mainly for use by nfs_getattr().
*
* If this is an 'ls -l', we want to force use of readdirplus.
- * Do this by checking if there is an active file descriptor
- * and calling nfs_advise_use_readdirplus, then forcing a
- * cache flush.
*/
-void nfs_force_use_readdirplus(struct inode *dir)
+void nfs_readdir_record_entry_cache_miss(struct inode *dir)
{
struct nfs_inode *nfsi = NFS_I(dir);
+ struct nfs_open_dir_context *ctx;
if (nfs_server_capable(dir, NFS_CAP_READDIRPLUS) &&
- !list_empty(&nfsi->open_files)) {
- set_bit(NFS_INO_ADVISE_RDPLUS, &nfsi->flags);
- set_bit(NFS_INO_FORCE_READDIR, &nfsi->flags);
+ S_ISDIR(dir->i_mode)) {
+ rcu_read_lock();
+ list_for_each_entry_rcu (ctx, &nfsi->open_files, list)
+ atomic_inc(&ctx->cache_misses);
+ rcu_read_unlock();
}
}
+static void nfs_lookup_advise_force_readdirplus(struct inode *dir,
+ unsigned int flags)
+{
+ if (nfs_server_capable(dir, NFS_CAP_CASE_INSENSITIVE))
+ return;
+ if (flags & (LOOKUP_EXCL | LOOKUP_PARENT | LOOKUP_REVAL))
+ return;
+ nfs_readdir_record_entry_cache_miss(dir);
+}
+
static
void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry,
unsigned long dir_verifier)
@@ -683,8 +771,12 @@ again:
status = nfs_refresh_inode(d_inode(dentry), entry->fattr);
if (!status)
nfs_setsecurity(d_inode(dentry), entry->fattr);
+ trace_nfs_readdir_lookup_revalidate(d_inode(parent),
+ dentry, 0, status);
goto out;
} else {
+ trace_nfs_readdir_lookup_revalidate_failed(
+ d_inode(parent), dentry, 0);
d_invalidate(dentry);
dput(dentry);
dentry = NULL;
@@ -706,22 +798,38 @@ again:
dentry = alias;
}
nfs_set_verifier(dentry, dir_verifier);
+ trace_nfs_readdir_lookup(d_inode(parent), dentry, 0);
out:
dput(dentry);
}
+static int nfs_readdir_entry_decode(struct nfs_readdir_descriptor *desc,
+ struct nfs_entry *entry,
+ struct xdr_stream *stream)
+{
+ int ret;
+
+ if (entry->fattr->label)
+ entry->fattr->label->len = NFS4_MAXLABELLEN;
+ ret = xdr_decode(desc, entry, stream);
+ if (ret || !desc->plus)
+ return ret;
+ nfs_prime_dcache(file_dentry(desc->file), entry, desc->dir_verifier);
+ return 0;
+}
+
/* Perform conversion from xdr to cache array */
static int nfs_readdir_page_filler(struct nfs_readdir_descriptor *desc,
struct nfs_entry *entry,
- struct page **xdr_pages,
- unsigned int buflen,
- struct page **arrays,
- size_t narrays)
+ struct page **xdr_pages, unsigned int buflen,
+ struct page **arrays, size_t narrays,
+ u64 change_attr)
{
struct address_space *mapping = desc->file->f_mapping;
struct xdr_stream stream;
struct xdr_buf buf;
struct page *scratch, *new, *page = *arrays;
+ u64 cookie;
int status;
scratch = alloc_page(GFP_KERNEL);
@@ -732,54 +840,50 @@ static int nfs_readdir_page_filler(struct nfs_readdir_descriptor *desc,
xdr_set_scratch_page(&stream, scratch);
do {
- if (entry->fattr->label)
- entry->fattr->label->len = NFS4_MAXLABELLEN;
-
- status = xdr_decode(desc, entry, &stream);
+ status = nfs_readdir_entry_decode(desc, entry, &stream);
if (status != 0)
break;
- if (desc->plus)
- nfs_prime_dcache(file_dentry(desc->file), entry,
- desc->dir_verifier);
-
- status = nfs_readdir_add_to_array(entry, page);
+ status = nfs_readdir_page_array_append(page, entry, &cookie);
if (status != -ENOSPC)
continue;
if (page->mapping != mapping) {
if (!--narrays)
break;
- new = nfs_readdir_page_array_alloc(entry->prev_cookie,
- GFP_KERNEL);
+ new = nfs_readdir_page_array_alloc(cookie, GFP_KERNEL);
if (!new)
break;
arrays++;
*arrays = page = new;
} else {
- new = nfs_readdir_page_get_next(mapping,
- page->index + 1,
- entry->prev_cookie);
+ new = nfs_readdir_page_get_next(mapping, cookie,
+ change_attr);
if (!new)
break;
if (page != *arrays)
nfs_readdir_page_unlock_and_put(page);
page = new;
}
- status = nfs_readdir_add_to_array(entry, page);
+ desc->page_index_max++;
+ status = nfs_readdir_page_array_append(page, entry, &cookie);
} while (!status && !entry->eof);
switch (status) {
case -EBADCOOKIE:
- if (entry->eof) {
- nfs_readdir_page_set_eof(page);
- status = 0;
- }
- break;
- case -ENOSPC:
+ if (!entry->eof)
+ break;
+ nfs_readdir_page_set_eof(page);
+ fallthrough;
case -EAGAIN:
status = 0;
break;
+ case -ENOSPC:
+ status = 0;
+ if (!desc->plus)
+ break;
+ while (!nfs_readdir_entry_decode(desc, entry, &stream))
+ ;
}
if (page != *arrays)
@@ -825,12 +929,14 @@ static int nfs_readdir_xdr_to_array(struct nfs_readdir_descriptor *desc,
__be32 *verf_arg, __be32 *verf_res,
struct page **arrays, size_t narrays)
{
+ u64 change_attr;
struct page **pages;
struct page *page = *arrays;
struct nfs_entry *entry;
size_t array_size;
struct inode *inode = file_inode(desc->file);
- size_t dtsize = NFS_SERVER(inode)->dtsize;
+ unsigned int dtsize = desc->dtsize;
+ unsigned int pglen;
int status = -ENOMEM;
entry = kzalloc(sizeof(*entry), GFP_KERNEL);
@@ -848,26 +954,21 @@ static int nfs_readdir_xdr_to_array(struct nfs_readdir_descriptor *desc,
if (!pages)
goto out;
- do {
- unsigned int pglen;
- status = nfs_readdir_xdr_filler(desc, verf_arg, entry->cookie,
- pages, dtsize,
- verf_res);
- if (status < 0)
- break;
-
- pglen = status;
- if (pglen == 0) {
- nfs_readdir_page_set_eof(page);
- break;
- }
-
- verf_arg = verf_res;
+ change_attr = inode_peek_iversion_raw(inode);
+ status = nfs_readdir_xdr_filler(desc, verf_arg, entry->cookie, pages,
+ dtsize, verf_res);
+ if (status < 0)
+ goto free_pages;
+ pglen = status;
+ if (pglen != 0)
status = nfs_readdir_page_filler(desc, entry, pages, pglen,
- arrays, narrays);
- } while (!status && nfs_readdir_page_needs_filling(page));
+ arrays, narrays, change_attr);
+ else
+ nfs_readdir_page_set_eof(page);
+ desc->buffer_fills++;
+free_pages:
nfs_readdir_free_pages(pages, array_size);
out:
nfs_free_fattr(entry->fattr);
@@ -892,9 +993,17 @@ nfs_readdir_page_unlock_and_put_cached(struct nfs_readdir_descriptor *desc)
static struct page *
nfs_readdir_page_get_cached(struct nfs_readdir_descriptor *desc)
{
- return nfs_readdir_page_get_locked(desc->file->f_mapping,
- desc->page_index,
- desc->last_cookie);
+ struct address_space *mapping = desc->file->f_mapping;
+ u64 change_attr = inode_peek_iversion_raw(mapping->host);
+ u64 cookie = desc->last_cookie;
+ struct page *page;
+
+ page = nfs_readdir_page_get_locked(mapping, cookie, change_attr);
+ if (!page)
+ return NULL;
+ if (desc->clear_cache && !nfs_readdir_page_needs_filling(page))
+ nfs_readdir_page_reinit_array(page, cookie, change_attr);
+ return page;
}
/*
@@ -912,13 +1021,23 @@ static int find_and_lock_cache_page(struct nfs_readdir_descriptor *desc)
if (!desc->page)
return -ENOMEM;
if (nfs_readdir_page_needs_filling(desc->page)) {
+ /* Grow the dtsize if we had to go back for more pages */
+ if (desc->page_index == desc->page_index_max)
+ nfs_grow_dtsize(desc);
+ desc->page_index_max = desc->page_index;
+ trace_nfs_readdir_cache_fill(desc->file, nfsi->cookieverf,
+ desc->last_cookie,
+ desc->page->index, desc->dtsize);
res = nfs_readdir_xdr_to_array(desc, nfsi->cookieverf, verf,
&desc->page, 1);
if (res < 0) {
nfs_readdir_page_unlock_and_put_cached(desc);
+ trace_nfs_readdir_cache_fill_done(inode, res);
if (res == -EBADCOOKIE || res == -ENOTSYNC) {
invalidate_inode_pages2(desc->file->f_mapping);
- desc->page_index = 0;
+ nfs_readdir_rewind_search(desc);
+ trace_nfs_readdir_invalidate_cache_range(
+ inode, 0, MAX_LFS_FILESIZE);
return -EAGAIN;
}
return res;
@@ -926,9 +1045,16 @@ static int find_and_lock_cache_page(struct nfs_readdir_descriptor *desc)
/*
* Set the cookie verifier if the page cache was empty
*/
- if (desc->page_index == 0)
+ if (desc->last_cookie == 0 &&
+ memcmp(nfsi->cookieverf, verf, sizeof(nfsi->cookieverf))) {
memcpy(nfsi->cookieverf, verf,
sizeof(nfsi->cookieverf));
+ invalidate_inode_pages2_range(desc->file->f_mapping, 1,
+ -1);
+ trace_nfs_readdir_invalidate_cache_range(
+ inode, 1, MAX_LFS_FILESIZE);
+ }
+ desc->clear_cache = false;
}
res = nfs_readdir_search_array(desc);
if (res == 0)
@@ -937,34 +1063,12 @@ static int find_and_lock_cache_page(struct nfs_readdir_descriptor *desc)
return res;
}
-static bool nfs_readdir_dont_search_cache(struct nfs_readdir_descriptor *desc)
-{
- struct address_space *mapping = desc->file->f_mapping;
- struct inode *dir = file_inode(desc->file);
- unsigned int dtsize = NFS_SERVER(dir)->dtsize;
- loff_t size = i_size_read(dir);
-
- /*
- * Default to uncached readdir if the page cache is empty, and
- * we're looking for a non-zero cookie in a large directory.
- */
- return desc->dir_cookie != 0 && mapping->nrpages == 0 && size > dtsize;
-}
-
/* Search for desc->dir_cookie from the beginning of the page cache */
static int readdir_search_pagecache(struct nfs_readdir_descriptor *desc)
{
int res;
- if (nfs_readdir_dont_search_cache(desc))
- return -EBADCOOKIE;
-
do {
- if (desc->page_index == 0) {
- desc->current_index = 0;
- desc->prev_index = 0;
- desc->last_cookie = 0;
- }
res = find_and_lock_cache_page(desc);
} while (res == -EAGAIN);
return res;
@@ -978,34 +1082,35 @@ static void nfs_do_filldir(struct nfs_readdir_descriptor *desc,
{
struct file *file = desc->file;
struct nfs_cache_array *array;
- unsigned int i = 0;
+ unsigned int i;
- array = kmap(desc->page);
+ array = kmap_local_page(desc->page);
for (i = desc->cache_entry_index; i < array->size; i++) {
struct nfs_cache_array_entry *ent;
ent = &array->array[i];
if (!dir_emit(desc->ctx, ent->name, ent->name_len,
nfs_compat_user_ino64(ent->ino), ent->d_type)) {
- desc->eof = true;
+ desc->eob = true;
break;
}
memcpy(desc->verf, verf, sizeof(desc->verf));
- if (i < (array->size-1))
- desc->dir_cookie = array->array[i+1].cookie;
- else
+ if (i == array->size - 1) {
desc->dir_cookie = array->last_cookie;
+ nfs_readdir_seek_next_array(array, desc);
+ } else {
+ desc->dir_cookie = array->array[i + 1].cookie;
+ desc->last_cookie = array->array[0].cookie;
+ }
if (nfs_readdir_use_cookie(file))
desc->ctx->pos = desc->dir_cookie;
else
desc->ctx->pos++;
- if (desc->duped != 0)
- desc->duped = 1;
}
if (array->page_is_eof)
- desc->eof = true;
+ desc->eof = !desc->eob;
- kunmap(desc->page);
+ kunmap_local(array);
dfprintk(DIRCACHE, "NFS: nfs_do_filldir() filling ended @ cookie %llu\n",
(unsigned long long)desc->dir_cookie);
}
@@ -1040,26 +1145,63 @@ static int uncached_readdir(struct nfs_readdir_descriptor *desc)
goto out;
desc->page_index = 0;
+ desc->cache_entry_index = 0;
desc->last_cookie = desc->dir_cookie;
- desc->duped = 0;
+ desc->page_index_max = 0;
+
+ trace_nfs_readdir_uncached(desc->file, desc->verf, desc->last_cookie,
+ -1, desc->dtsize);
status = nfs_readdir_xdr_to_array(desc, desc->verf, verf, arrays, sz);
+ if (status < 0) {
+ trace_nfs_readdir_uncached_done(file_inode(desc->file), status);
+ goto out_free;
+ }
- for (i = 0; !desc->eof && i < sz && arrays[i]; i++) {
+ for (i = 0; !desc->eob && i < sz && arrays[i]; i++) {
desc->page = arrays[i];
nfs_do_filldir(desc, verf);
}
desc->page = NULL;
-
+ /*
+ * Grow the dtsize if we have to go back for more pages,
+ * or shrink it if we're reading too many.
+ */
+ if (!desc->eof) {
+ if (!desc->eob)
+ nfs_grow_dtsize(desc);
+ else if (desc->buffer_fills == 1 &&
+ i < (desc->page_index_max >> 1))
+ nfs_shrink_dtsize(desc);
+ }
+out_free:
for (i = 0; i < sz && arrays[i]; i++)
nfs_readdir_page_array_free(arrays[i]);
out:
+ if (!nfs_readdir_use_cookie(desc->file))
+ nfs_readdir_rewind_search(desc);
+ desc->page_index_max = -1;
kfree(arrays);
dfprintk(DIRCACHE, "NFS: %s: returns %d\n", __func__, status);
return status;
}
+#define NFS_READDIR_CACHE_MISS_THRESHOLD (16UL)
+
+static bool nfs_readdir_handle_cache_misses(struct inode *inode,
+ struct nfs_readdir_descriptor *desc,
+ unsigned int cache_misses,
+ bool force_clear)
+{
+ if (desc->ctx->pos == 0 || !desc->plus)
+ return false;
+ if (cache_misses <= NFS_READDIR_CACHE_MISS_THRESHOLD && !force_clear)
+ return false;
+ trace_nfs_readdir_force_readdirplus(inode);
+ return true;
+}
+
/* The file offset position represents the dirent entry number. A
last cookie cache takes care of the common case of reading the
whole directory.
@@ -1071,7 +1213,8 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
struct nfs_inode *nfsi = NFS_I(inode);
struct nfs_open_dir_context *dir_ctx = file->private_data;
struct nfs_readdir_descriptor *desc;
- pgoff_t page_index;
+ unsigned int cache_hits, cache_misses;
+ bool force_clear;
int res;
dfprintk(FILE, "NFS: readdir(%pD2) starting at cookie %llu\n",
@@ -1084,11 +1227,7 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
* to either find the entry with the appropriate number or
* revalidate the cookie.
*/
- if (ctx->pos == 0 || nfs_attribute_cache_expired(inode)) {
- res = nfs_revalidate_mapping(inode, file->f_mapping);
- if (res < 0)
- goto out;
- }
+ nfs_revalidate_mapping(inode, file->f_mapping);
res = -ENOMEM;
desc = kzalloc(sizeof(*desc), GFP_KERNEL);
@@ -1096,20 +1235,30 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
goto out;
desc->file = file;
desc->ctx = ctx;
- desc->plus = nfs_use_readdirplus(inode, ctx);
+ desc->page_index_max = -1;
spin_lock(&file->f_lock);
desc->dir_cookie = dir_ctx->dir_cookie;
- desc->dup_cookie = dir_ctx->dup_cookie;
- desc->duped = dir_ctx->duped;
- page_index = dir_ctx->page_index;
+ desc->page_index = dir_ctx->page_index;
+ desc->last_cookie = dir_ctx->last_cookie;
desc->attr_gencount = dir_ctx->attr_gencount;
+ desc->eof = dir_ctx->eof;
+ nfs_set_dtsize(desc, dir_ctx->dtsize);
memcpy(desc->verf, dir_ctx->verf, sizeof(desc->verf));
+ cache_hits = atomic_xchg(&dir_ctx->cache_hits, 0);
+ cache_misses = atomic_xchg(&dir_ctx->cache_misses, 0);
+ force_clear = dir_ctx->force_clear;
spin_unlock(&file->f_lock);
- if (test_and_clear_bit(NFS_INO_FORCE_READDIR, &nfsi->flags) &&
- list_is_singular(&nfsi->open_files))
- invalidate_mapping_pages(inode->i_mapping, page_index + 1, -1);
+ if (desc->eof) {
+ res = 0;
+ goto out_free;
+ }
+
+ desc->plus = nfs_use_readdirplus(inode, ctx, cache_hits, cache_misses);
+ force_clear = nfs_readdir_handle_cache_misses(inode, desc, cache_misses,
+ force_clear);
+ desc->clear_cache = force_clear;
do {
res = readdir_search_pagecache(desc);
@@ -1128,9 +1277,7 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
break;
}
if (res == -ETOOSMALL && desc->plus) {
- clear_bit(NFS_INO_ADVISE_RDPLUS, &nfsi->flags);
nfs_zap_caches(inode);
- desc->page_index = 0;
desc->plus = false;
desc->eof = false;
continue;
@@ -1140,17 +1287,21 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
nfs_do_filldir(desc, nfsi->cookieverf);
nfs_readdir_page_unlock_and_put_cached(desc);
- } while (!desc->eof);
+ if (desc->page_index == desc->page_index_max)
+ desc->clear_cache = force_clear;
+ } while (!desc->eob && !desc->eof);
spin_lock(&file->f_lock);
dir_ctx->dir_cookie = desc->dir_cookie;
- dir_ctx->dup_cookie = desc->dup_cookie;
- dir_ctx->duped = desc->duped;
+ dir_ctx->last_cookie = desc->last_cookie;
dir_ctx->attr_gencount = desc->attr_gencount;
dir_ctx->page_index = desc->page_index;
+ dir_ctx->force_clear = force_clear;
+ dir_ctx->eof = desc->eof;
+ dir_ctx->dtsize = desc->dtsize;
memcpy(dir_ctx->verf, desc->verf, sizeof(dir_ctx->verf));
spin_unlock(&file->f_lock);
-
+out_free:
kfree(desc);
out:
@@ -1185,13 +1336,15 @@ static loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int whence)
}
if (offset != filp->f_pos) {
filp->f_pos = offset;
- if (nfs_readdir_use_cookie(filp))
- dir_ctx->dir_cookie = offset;
- else
+ dir_ctx->page_index = 0;
+ if (!nfs_readdir_use_cookie(filp)) {
dir_ctx->dir_cookie = 0;
- if (offset == 0)
- memset(dir_ctx->verf, 0, sizeof(dir_ctx->verf));
- dir_ctx->duped = 0;
+ dir_ctx->last_cookie = 0;
+ } else {
+ dir_ctx->dir_cookie = offset;
+ dir_ctx->last_cookie = offset;
+ }
+ dir_ctx->eof = false;
}
spin_unlock(&filp->f_lock);
return offset;
@@ -1324,6 +1477,14 @@ void nfs_clear_verifier_delegated(struct inode *inode)
EXPORT_SYMBOL_GPL(nfs_clear_verifier_delegated);
#endif /* IS_ENABLED(CONFIG_NFS_V4) */
+static int nfs_dentry_verify_change(struct inode *dir, struct dentry *dentry)
+{
+ if (nfs_server_capable(dir, NFS_CAP_CASE_INSENSITIVE) &&
+ d_really_is_negative(dentry))
+ return dentry->d_time == inode_peek_iversion_raw(dir);
+ return nfs_verify_change_attribute(dir, dentry->d_time);
+}
+
/*
* A check for whether or not the parent directory has changed.
* In the case it has, we assume that the dentries are untrustworthy
@@ -1337,7 +1498,7 @@ static int nfs_check_verifier(struct inode *dir, struct dentry *dentry,
return 1;
if (NFS_SERVER(dir)->flags & NFS_MOUNT_LOOKUP_CACHE_NONE)
return 0;
- if (!nfs_verify_change_attribute(dir, dentry->d_time))
+ if (!nfs_dentry_verify_change(dir, dentry))
return 0;
/* Revalidate nfsi->cache_change_attribute before we declare a match */
if (nfs_mapping_need_revalidate_inode(dir)) {
@@ -1346,7 +1507,7 @@ static int nfs_check_verifier(struct inode *dir, struct dentry *dentry,
if (__nfs_revalidate_inode(NFS_SERVER(dir), dir) < 0)
return 0;
}
- if (!nfs_verify_change_attribute(dir, dentry->d_time))
+ if (!nfs_dentry_verify_change(dir, dentry))
return 0;
return 1;
}
@@ -1398,7 +1559,12 @@ int nfs_lookup_verify_inode(struct inode *inode, unsigned int flags)
if (flags & LOOKUP_REVAL)
goto out_force;
out:
- return (inode->i_nlink == 0) ? -ESTALE : 0;
+ if (inode->i_nlink > 0 ||
+ (inode->i_nlink == 0 &&
+ test_bit(NFS_INO_PRESERVE_UNLINKED, &NFS_I(inode)->flags)))
+ return 0;
+ else
+ return -ESTALE;
out_force:
if (flags & LOOKUP_RCU)
return -ECHILD;
@@ -1436,6 +1602,9 @@ int nfs_neg_need_reval(struct inode *dir, struct dentry *dentry,
return 0;
if (NFS_SERVER(dir)->flags & NFS_MOUNT_LOOKUP_CACHE_NONEG)
return 1;
+ /* Case insensitive server? Revalidate negative dentries */
+ if (nfs_server_capable(dir, NFS_CAP_CASE_INSENSITIVE))
+ return 1;
return !nfs_check_verifier(dir, dentry, flags & LOOKUP_RCU);
}
@@ -1445,9 +1614,7 @@ nfs_lookup_revalidate_done(struct inode *dir, struct dentry *dentry,
{
switch (error) {
case 1:
- dfprintk(LOOKUPCACHE, "NFS: %s(%pd2) is valid\n",
- __func__, dentry);
- return 1;
+ break;
case 0:
/*
* We can't d_drop the root of a disconnected tree:
@@ -1456,13 +1623,10 @@ nfs_lookup_revalidate_done(struct inode *dir, struct dentry *dentry,
* inodes on unmount and further oopses.
*/
if (inode && IS_ROOT(dentry))
- return 1;
- dfprintk(LOOKUPCACHE, "NFS: %s(%pd2) is invalid\n",
- __func__, dentry);
- return 0;
+ error = 1;
+ break;
}
- dfprintk(LOOKUPCACHE, "NFS: %s(%pd2) lookup returned error %d\n",
- __func__, dentry, error);
+ trace_nfs_lookup_revalidate_exit(dir, dentry, 0, error);
return error;
}
@@ -1487,15 +1651,17 @@ nfs_lookup_revalidate_delegated(struct inode *dir, struct dentry *dentry,
return nfs_lookup_revalidate_done(dir, dentry, inode, 1);
}
-static int
-nfs_lookup_revalidate_dentry(struct inode *dir, struct dentry *dentry,
- struct inode *inode)
+static int nfs_lookup_revalidate_dentry(struct inode *dir,
+ struct dentry *dentry,
+ struct inode *inode, unsigned int flags)
{
struct nfs_fh *fhandle;
struct nfs_fattr *fattr;
unsigned long dir_verifier;
int ret;
+ trace_nfs_lookup_revalidate_enter(dir, dentry, flags);
+
ret = -ENOMEM;
fhandle = nfs_alloc_fhandle();
fattr = nfs_alloc_fattr_with_label(NFS_SERVER(inode));
@@ -1516,6 +1682,10 @@ nfs_lookup_revalidate_dentry(struct inode *dir, struct dentry *dentry,
}
goto out;
}
+
+ /* Request help from readdirplus */
+ nfs_lookup_advise_force_readdirplus(dir, flags);
+
ret = 0;
if (nfs_compare_fh(NFS_FH(inode), fhandle))
goto out;
@@ -1525,8 +1695,6 @@ nfs_lookup_revalidate_dentry(struct inode *dir, struct dentry *dentry,
nfs_setsecurity(inode, fattr);
nfs_set_verifier(dentry, dir_verifier);
- /* set a readdirplus hint that we had a cache miss */
- nfs_force_use_readdirplus(dir);
ret = 1;
out:
nfs_free_fattr(fattr);
@@ -1536,7 +1704,7 @@ out:
* If the lookup failed despite the dentry change attribute being
* a match, then we should revalidate the directory cache.
*/
- if (!ret && nfs_verify_change_attribute(dir, dentry->d_time))
+ if (!ret && nfs_dentry_verify_change(dir, dentry))
nfs_mark_dir_for_revalidate(dir);
return nfs_lookup_revalidate_done(dir, dentry, inode, ret);
}
@@ -1571,6 +1739,10 @@ nfs_do_lookup_revalidate(struct inode *dir, struct dentry *dentry,
goto out_bad;
}
+ if ((flags & LOOKUP_RENAME_TARGET) && d_count(dentry) < 2 &&
+ nfs_server_capable(dir, NFS_CAP_CASE_INSENSITIVE))
+ goto out_bad;
+
if (nfs_verifier_is_delegated(dentry))
return nfs_lookup_revalidate_delegated(dir, dentry, inode);
@@ -1583,7 +1755,6 @@ nfs_do_lookup_revalidate(struct inode *dir, struct dentry *dentry,
nfs_mark_dir_for_revalidate(dir);
goto out_bad;
}
- nfs_advise_use_readdirplus(dir);
goto out_valid;
}
@@ -1593,10 +1764,7 @@ nfs_do_lookup_revalidate(struct inode *dir, struct dentry *dentry,
if (NFS_STALE(inode))
goto out_bad;
- trace_nfs_lookup_revalidate_enter(dir, dentry, flags);
- error = nfs_lookup_revalidate_dentry(dir, dentry, inode);
- trace_nfs_lookup_revalidate_exit(dir, dentry, flags, error);
- return error;
+ return nfs_lookup_revalidate_dentry(dir, dentry, inode, flags);
out_valid:
return nfs_lookup_revalidate_done(dir, dentry, inode, 1);
out_bad:
@@ -1614,6 +1782,8 @@ __nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags,
int ret;
if (flags & LOOKUP_RCU) {
+ if (dentry->d_fsdata == NFS_FSDATA_BLOCKED)
+ return -ECHILD;
parent = READ_ONCE(dentry->d_parent);
dir = d_inode_rcu(parent);
if (!dir)
@@ -1622,6 +1792,9 @@ __nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags,
if (parent != READ_ONCE(dentry->d_parent))
return -ECHILD;
} else {
+ /* Wait for unlink to complete */
+ wait_var_event(&dentry->d_fsdata,
+ dentry->d_fsdata != NFS_FSDATA_BLOCKED);
parent = dget_parent(dentry);
ret = reval(d_inode(parent), dentry, flags);
dput(parent);
@@ -1775,8 +1948,11 @@ struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, unsigned in
dir_verifier = nfs_save_change_attribute(dir);
trace_nfs_lookup_enter(dir, dentry, flags);
error = NFS_PROTO(dir)->lookup(dir, dentry, fhandle, fattr);
- if (error == -ENOENT)
+ if (error == -ENOENT) {
+ if (nfs_server_capable(dir, NFS_CAP_CASE_INSENSITIVE))
+ dir_verifier = inode_peek_iversion_raw(dir);
goto no_entry;
+ }
if (error < 0) {
res = ERR_PTR(error);
goto out;
@@ -1787,7 +1963,7 @@ struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, unsigned in
goto out;
/* Notify readdir to use READDIRPLUS */
- nfs_force_use_readdirplus(dir);
+ nfs_lookup_advise_force_readdirplus(dir, flags);
no_entry:
res = d_splice_alias(inode, dentry);
@@ -1805,6 +1981,14 @@ out:
}
EXPORT_SYMBOL_GPL(nfs_lookup);
+void nfs_d_prune_case_insensitive_aliases(struct inode *inode)
+{
+ /* Case insensitive server? Revalidate dentries */
+ if (inode && nfs_server_capable(inode, NFS_CAP_CASE_INSENSITIVE))
+ d_prune_aliases(inode);
+}
+EXPORT_SYMBOL_GPL(nfs_d_prune_case_insensitive_aliases);
+
#if IS_ENABLED(CONFIG_NFS_V4)
static int nfs4_lookup_revalidate(struct dentry *, unsigned int);
@@ -1818,16 +2002,6 @@ const struct dentry_operations nfs4_dentry_operations = {
};
EXPORT_SYMBOL_GPL(nfs4_dentry_operations);
-static fmode_t flags_to_mode(int flags)
-{
- fmode_t res = (__force fmode_t)flags & FMODE_EXEC;
- if ((flags & O_ACCMODE) != O_WRONLY)
- res |= FMODE_READ;
- if ((flags & O_ACCMODE) != O_RDONLY)
- res |= FMODE_WRITE;
- return res;
-}
-
static struct nfs_open_context *create_nfs_open_context(struct dentry *dentry, int open_flags, struct file *filp)
{
return alloc_nfs_open_context(dentry, flags_to_mode(open_flags), filp);
@@ -1848,7 +2022,7 @@ static int nfs_finish_open(struct nfs_open_context *ctx,
err = finish_open(file, dentry, do_open);
if (err)
goto out;
- if (S_ISREG(file->f_path.dentry->d_inode->i_mode))
+ if (S_ISREG(file_inode(file)->i_mode))
nfs_file_set_open_context(file, ctx);
else
err = -EOPENSTALE;
@@ -1866,6 +2040,7 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry,
struct iattr attr = { .ia_valid = ATTR_OPEN };
struct inode *inode;
unsigned int lookup_flags = 0;
+ unsigned long dir_verifier;
bool switched = false;
int created = 0;
int err;
@@ -1939,7 +2114,11 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry,
switch (err) {
case -ENOENT:
d_splice_alias(NULL, dentry);
- nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
+ if (nfs_server_capable(dir, NFS_CAP_CASE_INSENSITIVE))
+ dir_verifier = inode_peek_iversion_raw(dir);
+ else
+ dir_verifier = nfs_save_change_attribute(dir);
+ nfs_set_verifier(dentry, dir_verifier);
break;
case -EISDIR:
case -ENOTDIR:
@@ -1954,6 +2133,7 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry,
}
goto out;
}
+ file->f_mode |= FMODE_CAN_ODIRECT;
err = nfs_finish_open(ctx, ctx->dentry, file, open_flags);
trace_nfs_atomic_open_exit(dir, ctx, open_flags, err);
@@ -1967,6 +2147,24 @@ out:
no_open:
res = nfs_lookup(dir, dentry, lookup_flags);
+ if (!res) {
+ inode = d_inode(dentry);
+ if ((lookup_flags & LOOKUP_DIRECTORY) && inode &&
+ !(S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)))
+ res = ERR_PTR(-ENOTDIR);
+ else if (inode && S_ISREG(inode->i_mode))
+ res = ERR_PTR(-EOPENSTALE);
+ } else if (!IS_ERR(res)) {
+ inode = d_inode(res);
+ if ((lookup_flags & LOOKUP_DIRECTORY) && inode &&
+ !(S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))) {
+ dput(res);
+ res = ERR_PTR(-ENOTDIR);
+ } else if (inode && S_ISREG(inode->i_mode)) {
+ dput(res);
+ res = ERR_PTR(-EOPENSTALE);
+ }
+ }
if (switched) {
d_lookup_done(dentry);
if (!res)
@@ -2019,7 +2217,7 @@ nfs4_do_lookup_revalidate(struct inode *dir, struct dentry *dentry,
reval_dentry:
if (flags & LOOKUP_RCU)
return -ECHILD;
- return nfs_lookup_revalidate_dentry(dir, dentry, inode);
+ return nfs_lookup_revalidate_dentry(dir, dentry, inode, flags);
full_reval:
return nfs_do_lookup_revalidate(dir, dentry, flags);
@@ -2184,9 +2382,12 @@ static void nfs_dentry_remove_handle_error(struct inode *dir,
{
switch (error) {
case -ENOENT:
- d_delete(dentry);
- fallthrough;
+ if (d_really_is_positive(dentry))
+ d_delete(dentry);
+ nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
+ break;
case 0:
+ nfs_d_prune_case_insensitive_aliases(d_inode(dentry));
nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
}
}
@@ -2263,29 +2464,40 @@ out:
int nfs_unlink(struct inode *dir, struct dentry *dentry)
{
int error;
- int need_rehash = 0;
dfprintk(VFS, "NFS: unlink(%s/%lu, %pd)\n", dir->i_sb->s_id,
dir->i_ino, dentry);
trace_nfs_unlink_enter(dir, dentry);
spin_lock(&dentry->d_lock);
- if (d_count(dentry) > 1) {
+ if (d_count(dentry) > 1 && !test_bit(NFS_INO_PRESERVE_UNLINKED,
+ &NFS_I(d_inode(dentry))->flags)) {
spin_unlock(&dentry->d_lock);
/* Start asynchronous writeout of the inode */
write_inode_now(d_inode(dentry), 0);
error = nfs_sillyrename(dir, dentry);
goto out;
}
- if (!d_unhashed(dentry)) {
- __d_drop(dentry);
- need_rehash = 1;
+ /* We must prevent any concurrent open until the unlink
+ * completes. ->d_revalidate will wait for ->d_fsdata
+ * to clear. We set it here to ensure no lookup succeeds until
+ * the unlink is complete on the server.
+ */
+ error = -ETXTBSY;
+ if (WARN_ON(dentry->d_flags & DCACHE_NFSFS_RENAMED) ||
+ WARN_ON(dentry->d_fsdata == NFS_FSDATA_BLOCKED)) {
+ spin_unlock(&dentry->d_lock);
+ goto out;
}
+ /* old devname */
+ kfree(dentry->d_fsdata);
+ dentry->d_fsdata = NFS_FSDATA_BLOCKED;
+
spin_unlock(&dentry->d_lock);
error = nfs_safe_remove(dentry);
nfs_dentry_remove_handle_error(dir, dentry, error);
- if (need_rehash)
- d_rehash(dentry);
+ dentry->d_fsdata = NULL;
+ wake_up_var(&dentry->d_fsdata);
out:
trace_nfs_unlink_exit(dir, dentry, error);
return error;
@@ -2379,6 +2591,8 @@ nfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
trace_nfs_link_enter(inode, dir, dentry);
d_drop(dentry);
+ if (S_ISREG(inode->i_mode))
+ nfs_sync_inode(inode);
error = NFS_PROTO(dir)->link(inode, dir, &dentry->d_name);
if (error == 0) {
nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
@@ -2390,6 +2604,15 @@ nfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
}
EXPORT_SYMBOL_GPL(nfs_link);
+static void
+nfs_unblock_rename(struct rpc_task *task, struct nfs_renamedata *data)
+{
+ struct dentry *new_dentry = data->new_dentry;
+
+ new_dentry->d_fsdata = NULL;
+ wake_up_var(&new_dentry->d_fsdata);
+}
+
/*
* RENAME
* FIXME: Some nfsds, like the Linux user space nfsd, may generate a
@@ -2420,8 +2643,9 @@ int nfs_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
{
struct inode *old_inode = d_inode(old_dentry);
struct inode *new_inode = d_inode(new_dentry);
- struct dentry *dentry = NULL, *rehash = NULL;
+ struct dentry *dentry = NULL;
struct rpc_task *task;
+ bool must_unblock = false;
int error = -EBUSY;
if (flags)
@@ -2439,18 +2663,27 @@ int nfs_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
* the new target.
*/
if (new_inode && !S_ISDIR(new_inode->i_mode)) {
- /*
- * To prevent any new references to the target during the
- * rename, we unhash the dentry in advance.
+ /* We must prevent any concurrent open until the unlink
+ * completes. ->d_revalidate will wait for ->d_fsdata
+ * to clear. We set it here to ensure no lookup succeeds until
+ * the unlink is complete on the server.
*/
- if (!d_unhashed(new_dentry)) {
- d_drop(new_dentry);
- rehash = new_dentry;
+ error = -ETXTBSY;
+ if (WARN_ON(new_dentry->d_flags & DCACHE_NFSFS_RENAMED) ||
+ WARN_ON(new_dentry->d_fsdata == NFS_FSDATA_BLOCKED))
+ goto out;
+ if (new_dentry->d_fsdata) {
+ /* old devname */
+ kfree(new_dentry->d_fsdata);
+ new_dentry->d_fsdata = NULL;
}
+ spin_lock(&new_dentry->d_lock);
if (d_count(new_dentry) > 2) {
int err;
+ spin_unlock(&new_dentry->d_lock);
+
/* copy the target dentry's name */
dentry = d_alloc(new_dentry->d_parent,
&new_dentry->d_name);
@@ -2463,12 +2696,19 @@ int nfs_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
goto out;
new_dentry = dentry;
- rehash = NULL;
new_inode = NULL;
+ } else {
+ new_dentry->d_fsdata = NFS_FSDATA_BLOCKED;
+ must_unblock = true;
+ spin_unlock(&new_dentry->d_lock);
}
+
}
- task = nfs_async_rename(old_dir, new_dir, old_dentry, new_dentry, NULL);
+ if (S_ISREG(old_inode->i_mode))
+ nfs_sync_inode(old_inode);
+ task = nfs_async_rename(old_dir, new_dir, old_dentry, new_dentry,
+ must_unblock ? nfs_unblock_rename : NULL);
if (IS_ERR(task)) {
error = PTR_ERR(task);
goto out;
@@ -2492,8 +2732,6 @@ int nfs_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
spin_unlock(&old_inode->i_lock);
}
out:
- if (rehash)
- d_rehash(rehash);
trace_nfs_rename_exit(old_dir, old_dentry,
new_dir, new_dentry, error);
if (!error) {
@@ -2528,7 +2766,7 @@ MODULE_PARM_DESC(nfs_access_max_cachesize, "NFS access maximum total cache lengt
static void nfs_access_free_entry(struct nfs_access_entry *entry)
{
- put_cred(entry->cred);
+ put_group_info(entry->group_info);
kfree_rcu(entry, rcu_head);
smp_mb__before_atomic();
atomic_long_dec(&nfs_access_nr_entries);
@@ -2654,6 +2892,43 @@ void nfs_access_zap_cache(struct inode *inode)
}
EXPORT_SYMBOL_GPL(nfs_access_zap_cache);
+static int access_cmp(const struct cred *a, const struct nfs_access_entry *b)
+{
+ struct group_info *ga, *gb;
+ int g;
+
+ if (uid_lt(a->fsuid, b->fsuid))
+ return -1;
+ if (uid_gt(a->fsuid, b->fsuid))
+ return 1;
+
+ if (gid_lt(a->fsgid, b->fsgid))
+ return -1;
+ if (gid_gt(a->fsgid, b->fsgid))
+ return 1;
+
+ ga = a->group_info;
+ gb = b->group_info;
+ if (ga == gb)
+ return 0;
+ if (ga == NULL)
+ return -1;
+ if (gb == NULL)
+ return 1;
+ if (ga->ngroups < gb->ngroups)
+ return -1;
+ if (ga->ngroups > gb->ngroups)
+ return 1;
+
+ for (g = 0; g < ga->ngroups; g++) {
+ if (gid_lt(ga->gid[g], gb->gid[g]))
+ return -1;
+ if (gid_gt(ga->gid[g], gb->gid[g]))
+ return 1;
+ }
+ return 0;
+}
+
static struct nfs_access_entry *nfs_access_search_rbtree(struct inode *inode, const struct cred *cred)
{
struct rb_node *n = NFS_I(inode)->access_cache.rb_node;
@@ -2661,7 +2936,7 @@ static struct nfs_access_entry *nfs_access_search_rbtree(struct inode *inode, co
while (n != NULL) {
struct nfs_access_entry *entry =
rb_entry(n, struct nfs_access_entry, rb_node);
- int cmp = cred_fscmp(cred, entry->cred);
+ int cmp = access_cmp(cred, entry);
if (cmp < 0)
n = n->rb_left;
@@ -2673,7 +2948,7 @@ static struct nfs_access_entry *nfs_access_search_rbtree(struct inode *inode, co
return NULL;
}
-static int nfs_access_get_cached_locked(struct inode *inode, const struct cred *cred, struct nfs_access_entry *res, bool may_block)
+static int nfs_access_get_cached_locked(struct inode *inode, const struct cred *cred, u32 *mask, bool may_block)
{
struct nfs_inode *nfsi = NFS_I(inode);
struct nfs_access_entry *cache;
@@ -2703,8 +2978,7 @@ static int nfs_access_get_cached_locked(struct inode *inode, const struct cred *
spin_lock(&inode->i_lock);
retry = false;
}
- res->cred = cache->cred;
- res->mask = cache->mask;
+ *mask = cache->mask;
list_move_tail(&cache->lru, &nfsi->access_cache_entry_lru);
err = 0;
out:
@@ -2716,7 +2990,7 @@ out_zap:
return -ENOENT;
}
-static int nfs_access_get_cached_rcu(struct inode *inode, const struct cred *cred, struct nfs_access_entry *res)
+static int nfs_access_get_cached_rcu(struct inode *inode, const struct cred *cred, u32 *mask)
{
/* Only check the most recently returned cache entry,
* but do it without locking.
@@ -2732,35 +3006,36 @@ static int nfs_access_get_cached_rcu(struct inode *inode, const struct cred *cre
lh = rcu_dereference(list_tail_rcu(&nfsi->access_cache_entry_lru));
cache = list_entry(lh, struct nfs_access_entry, lru);
if (lh == &nfsi->access_cache_entry_lru ||
- cred_fscmp(cred, cache->cred) != 0)
+ access_cmp(cred, cache) != 0)
cache = NULL;
if (cache == NULL)
goto out;
if (nfs_check_cache_invalid(inode, NFS_INO_INVALID_ACCESS))
goto out;
- res->cred = cache->cred;
- res->mask = cache->mask;
+ *mask = cache->mask;
err = 0;
out:
rcu_read_unlock();
return err;
}
-int nfs_access_get_cached(struct inode *inode, const struct cred *cred, struct
-nfs_access_entry *res, bool may_block)
+int nfs_access_get_cached(struct inode *inode, const struct cred *cred,
+ u32 *mask, bool may_block)
{
int status;
- status = nfs_access_get_cached_rcu(inode, cred, res);
+ status = nfs_access_get_cached_rcu(inode, cred, mask);
if (status != 0)
- status = nfs_access_get_cached_locked(inode, cred, res,
+ status = nfs_access_get_cached_locked(inode, cred, mask,
may_block);
return status;
}
EXPORT_SYMBOL_GPL(nfs_access_get_cached);
-static void nfs_access_add_rbtree(struct inode *inode, struct nfs_access_entry *set)
+static void nfs_access_add_rbtree(struct inode *inode,
+ struct nfs_access_entry *set,
+ const struct cred *cred)
{
struct nfs_inode *nfsi = NFS_I(inode);
struct rb_root *root_node = &nfsi->access_cache;
@@ -2773,7 +3048,7 @@ static void nfs_access_add_rbtree(struct inode *inode, struct nfs_access_entry *
while (*p != NULL) {
parent = *p;
entry = rb_entry(parent, struct nfs_access_entry, rb_node);
- cmp = cred_fscmp(set->cred, entry->cred);
+ cmp = access_cmp(cred, entry);
if (cmp < 0)
p = &parent->rb_left;
@@ -2795,13 +3070,16 @@ found:
nfs_access_free_entry(entry);
}
-void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set)
+void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set,
+ const struct cred *cred)
{
struct nfs_access_entry *cache = kmalloc(sizeof(*cache), GFP_KERNEL);
if (cache == NULL)
return;
RB_CLEAR_NODE(&cache->rb_node);
- cache->cred = get_cred(set->cred);
+ cache->fsuid = cred->fsuid;
+ cache->fsgid = cred->fsgid;
+ cache->group_info = get_group_info(cred->group_info);
cache->mask = set->mask;
/* The above field assignments must be visible
@@ -2809,7 +3087,7 @@ void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set)
* use rcu_assign_pointer, so just force the memory barrier.
*/
smp_wmb();
- nfs_access_add_rbtree(inode, cache);
+ nfs_access_add_rbtree(inode, cache, cred);
/* Update accounting */
smp_mb__before_atomic();
@@ -2874,7 +3152,7 @@ static int nfs_do_access(struct inode *inode, const struct cred *cred, int mask)
trace_nfs_access_enter(inode);
- status = nfs_access_get_cached(inode, cred, &cache, may_block);
+ status = nfs_access_get_cached(inode, cred, &cache.mask, may_block);
if (status == 0)
goto out_cached;
@@ -2885,17 +3163,13 @@ static int nfs_do_access(struct inode *inode, const struct cred *cred, int mask)
/*
* Determine which access bits we want to ask for...
*/
- cache.mask = NFS_ACCESS_READ | NFS_ACCESS_MODIFY | NFS_ACCESS_EXTEND;
- if (nfs_server_capable(inode, NFS_CAP_XATTR)) {
- cache.mask |= NFS_ACCESS_XAREAD | NFS_ACCESS_XAWRITE |
- NFS_ACCESS_XALIST;
- }
+ cache.mask = NFS_ACCESS_READ | NFS_ACCESS_MODIFY | NFS_ACCESS_EXTEND |
+ nfs_access_xattr_mask(NFS_SERVER(inode));
if (S_ISDIR(inode->i_mode))
cache.mask |= NFS_ACCESS_DELETE | NFS_ACCESS_LOOKUP;
else
cache.mask |= NFS_ACCESS_EXECUTE;
- cache.cred = cred;
- status = NFS_PROTO(inode)->access(inode, &cache);
+ status = NFS_PROTO(inode)->access(inode, &cache, cred);
if (status != 0) {
if (status == -ESTALE) {
if (!S_ISDIR(inode->i_mode))
@@ -2905,7 +3179,7 @@ static int nfs_do_access(struct inode *inode, const struct cred *cred, int mask)
}
goto out;
}
- nfs_access_add_cache(inode, &cache);
+ nfs_access_add_cache(inode, &cache, cred);
out_cached:
cache_mask = nfs_access_calc_mask(cache.mask, inode->i_mode);
if ((mask & ~cache_mask & (MAY_READ | MAY_WRITE | MAY_EXEC)) != 0)
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 9cff8709c80a..1707f46b1335 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -59,44 +59,13 @@
#include "internal.h"
#include "iostat.h"
#include "pnfs.h"
+#include "fscache.h"
+#include "nfstrace.h"
#define NFSDBG_FACILITY NFSDBG_VFS
static struct kmem_cache *nfs_direct_cachep;
-struct nfs_direct_req {
- struct kref kref; /* release manager */
-
- /* I/O parameters */
- struct nfs_open_context *ctx; /* file open context info */
- struct nfs_lock_context *l_ctx; /* Lock context info */
- struct kiocb * iocb; /* controlling i/o request */
- struct inode * inode; /* target file of i/o */
-
- /* completion state */
- atomic_t io_count; /* i/os we're waiting for */
- spinlock_t lock; /* protect completion state */
-
- loff_t io_start; /* Start offset for I/O */
- ssize_t count, /* bytes actually processed */
- max_count, /* max expected count */
- bytes_left, /* bytes left to be sent */
- error; /* any reported error */
- struct completion completion; /* wait for i/o completion */
-
- /* commit state */
- struct nfs_mds_commit_info mds_cinfo; /* Storage for cinfo */
- struct pnfs_ds_commit_info ds_cinfo; /* Storage for cinfo */
- struct work_struct work;
- int flags;
- /* for write */
-#define NFS_ODIRECT_DO_COMMIT (1) /* an unstable reply was received */
-#define NFS_ODIRECT_RESCHED_WRITES (2) /* write verification failed */
- /* for read */
-#define NFS_ODIRECT_SHOULD_DIRTY (3) /* dirty user-space page after read */
-#define NFS_ODIRECT_DONE INT_MAX /* write verification failed */
-};
-
static const struct nfs_pgio_completion_ops nfs_direct_write_completion_ops;
static const struct nfs_commit_completion_ops nfs_direct_commit_completion_ops;
static void nfs_direct_write_complete(struct nfs_direct_req *dreq);
@@ -152,28 +121,25 @@ nfs_direct_count_bytes(struct nfs_direct_req *dreq,
}
/**
- * nfs_direct_IO - NFS address space operation for direct I/O
+ * nfs_swap_rw - NFS address space operation for swap I/O
* @iocb: target I/O control block
* @iter: I/O buffer
*
- * The presence of this routine in the address space ops vector means
- * the NFS client supports direct I/O. However, for most direct IO, we
- * shunt off direct read and write requests before the VFS gets them,
- * so this method is only ever called for swap.
+ * Perform IO to the swap-file. This is much like direct IO.
*/
-ssize_t nfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
+int nfs_swap_rw(struct kiocb *iocb, struct iov_iter *iter)
{
- struct inode *inode = iocb->ki_filp->f_mapping->host;
-
- /* we only support swap file calling nfs_direct_IO */
- if (!IS_SWAPFILE(inode))
- return 0;
+ ssize_t ret;
VM_BUG_ON(iov_iter_count(iter) != PAGE_SIZE);
if (iov_iter_rw(iter) == READ)
- return nfs_file_direct_read(iocb, iter);
- return nfs_file_direct_write(iocb, iter);
+ ret = nfs_file_direct_read(iocb, iter, true);
+ else
+ ret = nfs_file_direct_write(iocb, iter, true);
+ if (ret < 0)
+ return ret;
+ return 0;
}
static void nfs_direct_release_pages(struct page **pages, unsigned int npages)
@@ -366,13 +332,12 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
size_t pgbase;
unsigned npages, i;
- result = iov_iter_get_pages_alloc(iter, &pagevec,
+ result = iov_iter_get_pages_alloc2(iter, &pagevec,
rsize, &pgbase);
if (result < 0)
break;
bytes = result;
- iov_iter_advance(iter, bytes);
npages = (result + pgbase + PAGE_SIZE - 1) / PAGE_SIZE;
for (i = 0; i < npages; i++) {
struct nfs_page *req;
@@ -424,6 +389,7 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
* nfs_file_direct_read - file direct read operation for NFS files
* @iocb: target I/O control block
* @iter: vector of user buffers into which to read data
+ * @swap: flag indicating this is swap IO, not O_DIRECT IO
*
* We use this function for direct reads instead of calling
* generic_file_aio_read() in order to avoid gfar's check to see if
@@ -439,7 +405,8 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
* client must read the updated atime from the server back into its
* cache.
*/
-ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter)
+ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter,
+ bool swap)
{
struct file *file = iocb->ki_filp;
struct address_space *mapping = file->f_mapping;
@@ -478,15 +445,17 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter)
if (!is_sync_kiocb(iocb))
dreq->iocb = iocb;
- if (iter_is_iovec(iter))
+ if (user_backed_iter(iter))
dreq->flags = NFS_ODIRECT_SHOULD_DIRTY;
- nfs_start_io_direct(inode);
+ if (!swap)
+ nfs_start_io_direct(inode);
NFS_I(inode)->read_io += count;
requested = nfs_direct_read_schedule_iovec(dreq, iter, iocb->ki_pos);
- nfs_end_io_direct(inode);
+ if (!swap)
+ nfs_end_io_direct(inode);
if (requested > 0) {
result = nfs_direct_wait(dreq);
@@ -593,14 +562,17 @@ static void nfs_direct_commit_complete(struct nfs_commit_data *data)
struct nfs_page *req;
int status = data->task.tk_status;
+ trace_nfs_direct_commit_complete(dreq);
+
if (status < 0) {
/* Errors in commit are fatal */
dreq->error = status;
dreq->max_count = 0;
dreq->count = 0;
dreq->flags = NFS_ODIRECT_DONE;
- } else if (dreq->flags == NFS_ODIRECT_DONE)
+ } else {
status = dreq->error;
+ }
nfs_init_cinfo_from_dreq(&cinfo, dreq);
@@ -629,6 +601,8 @@ static void nfs_direct_resched_write(struct nfs_commit_info *cinfo,
{
struct nfs_direct_req *dreq = cinfo->dreq;
+ trace_nfs_direct_resched_write(dreq);
+
spin_lock(&dreq->lock);
if (dreq->flags != NFS_ODIRECT_DONE)
dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
@@ -693,6 +667,7 @@ static void nfs_direct_write_schedule_work(struct work_struct *work)
static void nfs_direct_write_complete(struct nfs_direct_req *dreq)
{
+ trace_nfs_direct_write_complete(dreq);
queue_work(nfsiod_workqueue, &dreq->work); /* Calls nfs_direct_write_schedule_work */
}
@@ -703,6 +678,8 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)
struct nfs_page *req = nfs_list_entry(hdr->pages.next);
int flags = NFS_ODIRECT_DONE;
+ trace_nfs_direct_write_completion(dreq);
+
nfs_init_cinfo_from_dreq(&cinfo, dreq);
spin_lock(&dreq->lock);
@@ -712,7 +689,7 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)
}
nfs_direct_count_bytes(dreq, hdr);
- if (hdr->good_bytes != 0 && nfs_write_need_commit(hdr)) {
+ if (test_bit(NFS_IOHDR_UNSTABLE_WRITES, &hdr->flags)) {
if (!dreq->flags)
dreq->flags = NFS_ODIRECT_DO_COMMIT;
flags = dreq->flags;
@@ -757,6 +734,8 @@ static void nfs_direct_write_reschedule_io(struct nfs_pgio_header *hdr)
{
struct nfs_direct_req *dreq = hdr->dreq;
+ trace_nfs_direct_write_reschedule_io(dreq);
+
spin_lock(&dreq->lock);
if (dreq->error == 0) {
dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
@@ -789,7 +768,7 @@ static const struct nfs_pgio_completion_ops nfs_direct_write_completion_ops = {
*/
static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
struct iov_iter *iter,
- loff_t pos)
+ loff_t pos, int ioflags)
{
struct nfs_pageio_descriptor desc;
struct inode *inode = dreq->inode;
@@ -797,7 +776,9 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
size_t requested_bytes = 0;
size_t wsize = max_t(size_t, NFS_SERVER(inode)->wsize, PAGE_SIZE);
- nfs_pageio_init_write(&desc, inode, FLUSH_COND_STABLE, false,
+ trace_nfs_direct_write_schedule_iovec(dreq);
+
+ nfs_pageio_init_write(&desc, inode, ioflags, false,
&nfs_direct_write_completion_ops);
desc.pg_dreq = dreq;
get_dreq(dreq);
@@ -810,13 +791,12 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
size_t pgbase;
unsigned npages, i;
- result = iov_iter_get_pages_alloc(iter, &pagevec,
+ result = iov_iter_get_pages_alloc2(iter, &pagevec,
wsize, &pgbase);
if (result < 0)
break;
bytes = result;
- iov_iter_advance(iter, bytes);
npages = (result + pgbase + PAGE_SIZE - 1) / PAGE_SIZE;
for (i = 0; i < npages; i++) {
struct nfs_page *req;
@@ -875,6 +855,7 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
* nfs_file_direct_write - file direct write operation for NFS files
* @iocb: target I/O control block
* @iter: vector of user buffers from which to write data
+ * @swap: flag indicating this is swap IO, not O_DIRECT IO
*
* We use this function for direct writes instead of calling
* generic_file_aio_write() in order to avoid taking the inode
@@ -891,7 +872,8 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
* Note that O_APPEND is not supported for NFS direct writes, as there
* is no atomic O_APPEND write facility in the NFS protocol.
*/
-ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter)
+ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter,
+ bool swap)
{
ssize_t result, requested;
size_t count;
@@ -905,7 +887,11 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter)
dfprintk(FILE, "NFS: direct write(%pD2, %zd@%Ld)\n",
file, iov_iter_count(iter), (long long) iocb->ki_pos);
- result = generic_write_checks(iocb, iter);
+ if (swap)
+ /* bypass generic checks */
+ result = iov_iter_count(iter);
+ else
+ result = generic_write_checks(iocb, iter);
if (result <= 0)
return result;
count = result;
@@ -936,16 +922,22 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter)
dreq->iocb = iocb;
pnfs_init_ds_commit_info_ops(&dreq->ds_cinfo, inode);
- nfs_start_io_direct(inode);
+ if (swap) {
+ requested = nfs_direct_write_schedule_iovec(dreq, iter, pos,
+ FLUSH_STABLE);
+ } else {
+ nfs_start_io_direct(inode);
- requested = nfs_direct_write_schedule_iovec(dreq, iter, pos);
+ requested = nfs_direct_write_schedule_iovec(dreq, iter, pos,
+ FLUSH_COND_STABLE);
- if (mapping->nrpages) {
- invalidate_inode_pages2_range(mapping,
- pos >> PAGE_SHIFT, end);
- }
+ if (mapping->nrpages) {
+ invalidate_inode_pages2_range(mapping,
+ pos >> PAGE_SHIFT, end);
+ }
- nfs_end_io_direct(inode);
+ nfs_end_io_direct(inode);
+ }
if (requested > 0) {
result = nfs_direct_wait(dreq);
@@ -959,6 +951,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter)
} else {
result = requested;
}
+ nfs_fscache_invalidate(inode, FSCACHE_INVAL_DIO_WRITE);
out_release:
nfs_direct_req_release(dreq);
out:
diff --git a/fs/nfs/dns_resolve.c b/fs/nfs/dns_resolve.c
index e87d500ad95a..6603b5cee029 100644
--- a/fs/nfs/dns_resolve.c
+++ b/fs/nfs/dns_resolve.c
@@ -16,8 +16,9 @@
#include "dns_resolve.h"
ssize_t nfs_dns_resolve_name(struct net *net, char *name, size_t namelen,
- struct sockaddr *sa, size_t salen)
+ struct sockaddr_storage *ss, size_t salen)
{
+ struct sockaddr *sa = (struct sockaddr *)ss;
ssize_t ret;
char *ip_addr = NULL;
int ip_len;
@@ -341,7 +342,7 @@ out:
}
ssize_t nfs_dns_resolve_name(struct net *net, char *name,
- size_t namelen, struct sockaddr *sa, size_t salen)
+ size_t namelen, struct sockaddr_storage *ss, size_t salen)
{
struct nfs_dns_ent key = {
.hostname = name,
@@ -354,7 +355,7 @@ ssize_t nfs_dns_resolve_name(struct net *net, char *name,
ret = do_cache_lookup_wait(nn->nfs_dns_resolve, &key, &item);
if (ret == 0) {
if (salen >= item->addrlen) {
- memcpy(sa, &item->addr, item->addrlen);
+ memcpy(ss, &item->addr, item->addrlen);
ret = item->addrlen;
} else
ret = -EOVERFLOW;
diff --git a/fs/nfs/dns_resolve.h b/fs/nfs/dns_resolve.h
index 576ff4b54c82..fe3b172c4de1 100644
--- a/fs/nfs/dns_resolve.h
+++ b/fs/nfs/dns_resolve.h
@@ -32,6 +32,6 @@ extern void nfs_dns_resolver_cache_destroy(struct net *net);
#endif
extern ssize_t nfs_dns_resolve_name(struct net *net, char *name,
- size_t namelen, struct sockaddr *sa, size_t salen);
+ size_t namelen, struct sockaddr_storage *sa, size_t salen);
#endif
diff --git a/fs/nfs/export.c b/fs/nfs/export.c
index 171c424cb6d5..01596f2d0a1e 100644
--- a/fs/nfs/export.c
+++ b/fs/nfs/export.c
@@ -158,5 +158,5 @@ const struct export_operations nfs_export_ops = {
.fetch_iversion = nfs_fetch_iversion,
.flags = EXPORT_OP_NOWCC|EXPORT_OP_NOSUBTREECHK|
EXPORT_OP_CLOSE_BEFORE_UNLINK|EXPORT_OP_REMOTE_FS|
- EXPORT_OP_NOATOMIC_ATTR|EXPORT_OP_SYNC_LOCKS,
+ EXPORT_OP_NOATOMIC_ATTR,
};
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 24e7dccce355..d8ec889a4b3f 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -44,11 +44,6 @@
static const struct vm_operations_struct nfs_file_vm_ops;
-/* Hack for future NFS swap support */
-#ifndef IS_SWAPFILE
-# define IS_SWAPFILE(inode) (0)
-#endif
-
int nfs_check_flags(int flags)
{
if ((flags & (O_APPEND | O_DIRECT)) == (O_APPEND | O_DIRECT))
@@ -74,6 +69,8 @@ nfs_file_open(struct inode *inode, struct file *filp)
return res;
res = nfs_open(inode, filp);
+ if (res == 0)
+ filp->f_mode |= FMODE_CAN_ODIRECT;
return res;
}
@@ -84,6 +81,7 @@ nfs_file_release(struct inode *inode, struct file *filp)
nfs_inc_stats(inode, NFSIOS_VFSRELEASE);
nfs_file_clear_open_context(filp);
+ nfs_fscache_release_file(inode, filp);
return 0;
}
EXPORT_SYMBOL_GPL(nfs_file_release);
@@ -161,7 +159,7 @@ nfs_file_read(struct kiocb *iocb, struct iov_iter *to)
ssize_t result;
if (iocb->ki_flags & IOCB_DIRECT)
- return nfs_file_direct_read(iocb, to);
+ return nfs_file_direct_read(iocb, to, false);
dprintk("NFS: read(%pD2, %zu@%lu)\n",
iocb->ki_filp,
@@ -208,22 +206,25 @@ static int
nfs_file_fsync_commit(struct file *file, int datasync)
{
struct inode *inode = file_inode(file);
- int ret;
+ int ret, ret2;
dprintk("NFS: fsync file(%pD2) datasync %d\n", file, datasync);
nfs_inc_stats(inode, NFSIOS_VFSFSYNC);
ret = nfs_commit_inode(inode, FLUSH_SYNC);
- if (ret < 0)
- return ret;
- return file_check_and_advance_wb_err(file);
+ ret2 = file_check_and_advance_wb_err(file);
+ if (ret2 < 0)
+ return ret2;
+ return ret;
}
int
nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync)
{
- struct nfs_open_context *ctx = nfs_file_open_context(file);
struct inode *inode = file_inode(file);
+ struct nfs_inode *nfsi = NFS_I(inode);
+ long save_nredirtied = atomic_long_read(&nfsi->redirtied_pages);
+ long nredirtied;
int ret;
trace_nfs_fsync_enter(inode);
@@ -238,15 +239,10 @@ nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync)
ret = pnfs_sync_inode(inode, !!datasync);
if (ret != 0)
break;
- if (!test_and_clear_bit(NFS_CONTEXT_RESEND_WRITES, &ctx->flags))
+ nredirtied = atomic_long_read(&nfsi->redirtied_pages);
+ if (nredirtied == save_nredirtied)
break;
- /*
- * If nfs_file_fsync_commit detected a server reboot, then
- * resend all dirty pages that might have been covered by
- * the NFS_CONTEXT_RESEND_WRITES flag
- */
- start = 0;
- end = LLONG_MAX;
+ save_nredirtied = nredirtied;
}
trace_nfs_fsync_exit(inode, ret);
@@ -317,7 +313,7 @@ static bool nfs_want_read_modify_write(struct file *file, struct page *page,
* increment the page use counts until he is done with the page.
*/
static int nfs_write_begin(struct file *file, struct address_space *mapping,
- loff_t pos, unsigned len, unsigned flags,
+ loff_t pos, unsigned len,
struct page **pagep, void **fsdata)
{
int ret;
@@ -329,7 +325,7 @@ static int nfs_write_begin(struct file *file, struct address_space *mapping,
file, mapping->host->i_ino, len, (long long) pos);
start:
- page = grab_cache_page_write_begin(mapping, index, flags);
+ page = grab_cache_page_write_begin(mapping, index);
if (!page)
return -ENOMEM;
*pagep = page;
@@ -341,7 +337,7 @@ start:
} else if (!once_thru &&
nfs_want_read_modify_write(file, page, pos, len)) {
once_thru = 1;
- ret = nfs_readpage(file, page);
+ ret = nfs_read_folio(file, page_folio(page));
put_page(page);
if (!ret)
goto start;
@@ -389,11 +385,8 @@ static int nfs_write_end(struct file *file, struct address_space *mapping,
return status;
NFS_I(mapping->host)->write_io += copied;
- if (nfs_ctx_key_to_expire(ctx, mapping->host)) {
- status = nfs_wb_all(mapping->host);
- if (status < 0)
- return status;
- }
+ if (nfs_ctx_key_to_expire(ctx, mapping->host))
+ nfs_wb_all(mapping->host);
return copied;
}
@@ -405,49 +398,45 @@ static int nfs_write_end(struct file *file, struct address_space *mapping,
* - Called if either PG_private or PG_fscache is set on the page
* - Caller holds page lock
*/
-static void nfs_invalidate_page(struct page *page, unsigned int offset,
- unsigned int length)
+static void nfs_invalidate_folio(struct folio *folio, size_t offset,
+ size_t length)
{
- dfprintk(PAGECACHE, "NFS: invalidate_page(%p, %u, %u)\n",
- page, offset, length);
+ dfprintk(PAGECACHE, "NFS: invalidate_folio(%lu, %zu, %zu)\n",
+ folio->index, offset, length);
- if (offset != 0 || length < PAGE_SIZE)
+ if (offset != 0 || length < folio_size(folio))
return;
/* Cancel any unstarted writes on this page */
- nfs_wb_page_cancel(page_file_mapping(page)->host, page);
-
- nfs_fscache_invalidate_page(page, page->mapping->host);
+ nfs_wb_folio_cancel(folio->mapping->host, folio);
+ folio_wait_fscache(folio);
}
/*
- * Attempt to release the private state associated with a page
- * - Called if either PG_private or PG_fscache is set on the page
- * - Caller holds page lock
- * - Return true (may release page) or false (may not)
+ * Attempt to release the private state associated with a folio
+ * - Called if either private or fscache flags are set on the folio
+ * - Caller holds folio lock
+ * - Return true (may release folio) or false (may not)
*/
-static int nfs_release_page(struct page *page, gfp_t gfp)
+static bool nfs_release_folio(struct folio *folio, gfp_t gfp)
{
- dfprintk(PAGECACHE, "NFS: release_page(%p)\n", page);
+ dfprintk(PAGECACHE, "NFS: release_folio(%p)\n", folio);
- /* If PagePrivate() is set, then the page is not freeable */
- if (PagePrivate(page))
- return 0;
- return nfs_fscache_release_page(page, gfp);
+ /* If the private flag is set, then the folio is not freeable */
+ if (folio_test_private(folio))
+ return false;
+ return nfs_fscache_release_folio(folio, gfp);
}
-static void nfs_check_dirty_writeback(struct page *page,
+static void nfs_check_dirty_writeback(struct folio *folio,
bool *dirty, bool *writeback)
{
struct nfs_inode *nfsi;
- struct address_space *mapping = page_file_mapping(page);
-
- if (!mapping || PageSwapCache(page))
- return;
+ struct address_space *mapping = folio->mapping;
/*
- * Check if an unstable page is currently being committed and
- * if so, have the VM treat it as if the page is under writeback
- * so it will not block due to pages that will shortly be freeable.
+ * Check if an unstable folio is currently being committed and
+ * if so, have the VM treat it as if the folio is under writeback
+ * so it will not block due to folios that will shortly be freeable.
*/
nfsi = NFS_I(mapping->host);
if (atomic_read(&nfsi->commit_info.rpcs_out)) {
@@ -456,11 +445,11 @@ static void nfs_check_dirty_writeback(struct page *page,
}
/*
- * If PagePrivate() is set, then the page is not freeable and as the
- * inode is not being committed, it's not going to be cleaned in the
- * near future so treat it as dirty
+ * If the private flag is set, then the folio is not freeable
+ * and as the inode is not being committed, it's not going to
+ * be cleaned in the near future so treat it as dirty
*/
- if (PagePrivate(page))
+ if (folio_test_private(folio))
*dirty = true;
}
@@ -472,16 +461,15 @@ static void nfs_check_dirty_writeback(struct page *page,
* - Caller holds page lock
* - Return 0 if successful, -error otherwise
*/
-static int nfs_launder_page(struct page *page)
+static int nfs_launder_folio(struct folio *folio)
{
- struct inode *inode = page_file_mapping(page)->host;
- struct nfs_inode *nfsi = NFS_I(inode);
+ struct inode *inode = folio->mapping->host;
- dfprintk(PAGECACHE, "NFS: launder_page(%ld, %llu)\n",
- inode->i_ino, (long long)page_offset(page));
+ dfprintk(PAGECACHE, "NFS: launder_folio(%ld, %llu)\n",
+ inode->i_ino, folio_pos(folio));
- nfs_fscache_wait_on_page_write(nfsi, page);
- return nfs_wb_page(inode, page);
+ folio_wait_fscache(folio);
+ return nfs_wb_page(inode, &folio->page);
}
static int nfs_swap_activate(struct swap_info_struct *sis, struct file *file,
@@ -489,8 +477,10 @@ static int nfs_swap_activate(struct swap_info_struct *sis, struct file *file,
{
unsigned long blocks;
long long isize;
- struct rpc_clnt *clnt = NFS_CLIENT(file->f_mapping->host);
- struct inode *inode = file->f_mapping->host;
+ int ret;
+ struct inode *inode = file_inode(file);
+ struct rpc_clnt *clnt = NFS_CLIENT(inode);
+ struct nfs_client *cl = NFS_SERVER(inode)->nfs_client;
spin_lock(&inode->i_lock);
blocks = inode->i_blocks;
@@ -501,37 +491,52 @@ static int nfs_swap_activate(struct swap_info_struct *sis, struct file *file,
return -EINVAL;
}
+ ret = rpc_clnt_swap_activate(clnt);
+ if (ret)
+ return ret;
+ ret = add_swap_extent(sis, 0, sis->max, 0);
+ if (ret < 0) {
+ rpc_clnt_swap_deactivate(clnt);
+ return ret;
+ }
+
*span = sis->pages;
- return rpc_clnt_swap_activate(clnt);
+ if (cl->rpc_ops->enable_swap)
+ cl->rpc_ops->enable_swap(inode);
+
+ sis->flags |= SWP_FS_OPS;
+ return ret;
}
static void nfs_swap_deactivate(struct file *file)
{
- struct rpc_clnt *clnt = NFS_CLIENT(file->f_mapping->host);
+ struct inode *inode = file_inode(file);
+ struct rpc_clnt *clnt = NFS_CLIENT(inode);
+ struct nfs_client *cl = NFS_SERVER(inode)->nfs_client;
rpc_clnt_swap_deactivate(clnt);
+ if (cl->rpc_ops->disable_swap)
+ cl->rpc_ops->disable_swap(file_inode(file));
}
const struct address_space_operations nfs_file_aops = {
- .readpage = nfs_readpage,
- .readpages = nfs_readpages,
- .set_page_dirty = __set_page_dirty_nobuffers,
+ .read_folio = nfs_read_folio,
+ .readahead = nfs_readahead,
+ .dirty_folio = filemap_dirty_folio,
.writepage = nfs_writepage,
.writepages = nfs_writepages,
.write_begin = nfs_write_begin,
.write_end = nfs_write_end,
- .invalidatepage = nfs_invalidate_page,
- .releasepage = nfs_release_page,
- .direct_IO = nfs_direct_IO,
-#ifdef CONFIG_MIGRATION
- .migratepage = nfs_migrate_page,
-#endif
- .launder_page = nfs_launder_page,
+ .invalidate_folio = nfs_invalidate_folio,
+ .release_folio = nfs_release_folio,
+ .migrate_folio = nfs_migrate_folio,
+ .launder_folio = nfs_launder_folio,
.is_dirty_writeback = nfs_check_dirty_writeback,
.error_remove_page = generic_error_remove_page,
.swap_activate = nfs_swap_activate,
.swap_deactivate = nfs_swap_deactivate,
+ .swap_rw = nfs_swap_rw,
};
/*
@@ -555,10 +560,15 @@ static vm_fault_t nfs_vm_page_mkwrite(struct vm_fault *vmf)
sb_start_pagefault(inode->i_sb);
/* make sure the cache has finished storing the page */
- nfs_fscache_wait_on_page_write(NFS_I(inode), page);
+ if (PageFsCache(page) &&
+ wait_on_page_fscache_killable(vmf->page) < 0) {
+ ret = VM_FAULT_RETRY;
+ goto out;
+ }
wait_on_bit_action(&NFS_I(inode)->flags, NFS_INO_INVALIDATING,
- nfs_wait_bit_killable, TASK_KILLABLE);
+ nfs_wait_bit_killable,
+ TASK_KILLABLE|TASK_FREEZABLE_UNSAFE);
lock_page(page);
mapping = page_file_mapping(page);
@@ -590,18 +600,6 @@ static const struct vm_operations_struct nfs_file_vm_ops = {
.page_mkwrite = nfs_vm_page_mkwrite,
};
-static int nfs_need_check_write(struct file *filp, struct inode *inode,
- int error)
-{
- struct nfs_open_context *ctx;
-
- ctx = nfs_file_open_context(filp);
- if (nfs_error_is_fatal_on_server(error) ||
- nfs_ctx_key_to_expire(ctx, inode))
- return 1;
- return 0;
-}
-
ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from)
{
struct file *file = iocb->ki_filp;
@@ -616,7 +614,7 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from)
return result;
if (iocb->ki_flags & IOCB_DIRECT)
- return nfs_file_direct_write(iocb, from);
+ return nfs_file_direct_write(iocb, from, false);
dprintk("NFS: write(%pD2, %zu@%Ld)\n",
file, iov_iter_count(from), (long long) iocb->ki_pos);
@@ -629,7 +627,7 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from)
if (iocb->ki_flags & IOCB_APPEND || iocb->ki_pos > i_size_read(inode)) {
result = nfs_revalidate_file_size(inode, file);
if (result)
- goto out;
+ return result;
}
nfs_clear_invalid_mapping(file->f_mapping);
@@ -639,7 +637,7 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from)
result = generic_write_checks(iocb, from);
if (result > 0) {
current->backing_dev_info = inode_to_bdi(inode);
- result = generic_perform_write(file, from, iocb->ki_pos);
+ result = generic_perform_write(iocb, from);
current->backing_dev_info = NULL;
}
nfs_end_io_write(inode);
@@ -648,6 +646,7 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from)
written = result;
iocb->ki_pos += written;
+ nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, written);
if (mntflags & NFS_MOUNT_WRITE_EAGER) {
result = filemap_fdatawrite_range(file->f_mapping,
@@ -657,25 +656,28 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from)
goto out;
}
if (mntflags & NFS_MOUNT_WRITE_WAIT) {
- result = filemap_fdatawait_range(file->f_mapping,
- iocb->ki_pos - written,
- iocb->ki_pos - 1);
- if (result < 0)
- goto out;
+ filemap_fdatawait_range(file->f_mapping,
+ iocb->ki_pos - written,
+ iocb->ki_pos - 1);
}
result = generic_write_sync(iocb, written);
if (result < 0)
- goto out;
+ return result;
+out:
/* Return error values */
error = filemap_check_wb_err(file->f_mapping, since);
- if (nfs_need_check_write(file, inode, error)) {
- int err = nfs_wb_all(inode);
- if (err < 0)
- result = err;
+ switch (error) {
+ default:
+ break;
+ case -EDQUOT:
+ case -EFBIG:
+ case -ENOSPC:
+ nfs_wb_all(inode);
+ error = file_check_and_advance_wb_err(file);
+ if (error < 0)
+ result = error;
}
- nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, written);
-out:
return result;
out_swapfile:
diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c
index 9c96e3e5ed35..ad34a33b0737 100644
--- a/fs/nfs/filelayout/filelayout.c
+++ b/fs/nfs/filelayout/filelayout.c
@@ -181,6 +181,8 @@ static int filelayout_async_handle_error(struct rpc_task *task,
case -EIO:
case -ETIMEDOUT:
case -EPIPE:
+ case -EPROTO:
+ case -ENODEV:
dprintk("%s DS connection error %d\n", __func__,
task->tk_status);
nfs4_mark_deviceid_unavailable(devid);
@@ -839,7 +841,12 @@ fl_pnfs_update_layout(struct inode *ino,
lseg = pnfs_update_layout(ino, ctx, pos, count, iomode, strict_iomode,
gfp_flags);
- if (IS_ERR_OR_NULL(lseg))
+ if (IS_ERR(lseg)) {
+ /* Fall back to MDS on recoverable errors */
+ if (!nfs_error_is_fatal_on_server(PTR_ERR(lseg)))
+ lseg = NULL;
+ goto out;
+ } else if (!lseg)
goto out;
lo = NFS_I(ino)->layout;
@@ -1075,7 +1082,7 @@ filelayout_setup_ds_info(struct pnfs_ds_commit_info *fl_cinfo,
unsigned int size = (fl->stripe_type == STRIPE_SPARSE) ?
fl->dsaddr->ds_num : fl->dsaddr->stripe_count;
- new = pnfs_alloc_commit_array(size, GFP_NOIO);
+ new = pnfs_alloc_commit_array(size, nfs_io_gfp_mask());
if (new) {
spin_lock(&inode->i_lock);
array = pnfs_add_commit_array(fl_cinfo, new, lseg);
diff --git a/fs/nfs/filelayout/filelayout.h b/fs/nfs/filelayout/filelayout.h
index 79323b5dab0c..aed0748fd6ec 100644
--- a/fs/nfs/filelayout/filelayout.h
+++ b/fs/nfs/filelayout/filelayout.h
@@ -51,7 +51,7 @@ struct nfs4_file_layout_dsaddr {
u32 stripe_count;
u8 *stripe_indices;
u32 ds_num;
- struct nfs4_pnfs_ds *ds_list[1];
+ struct nfs4_pnfs_ds *ds_list[];
};
struct nfs4_filelayout_segment {
diff --git a/fs/nfs/filelayout/filelayoutdev.c b/fs/nfs/filelayout/filelayoutdev.c
index 86c3f7e69ec4..acf4b88889dc 100644
--- a/fs/nfs/filelayout/filelayoutdev.c
+++ b/fs/nfs/filelayout/filelayoutdev.c
@@ -136,9 +136,7 @@ nfs4_fl_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev,
goto out_err_free_stripe_indices;
}
- dsaddr = kzalloc(sizeof(*dsaddr) +
- (sizeof(struct nfs4_pnfs_ds *) * (num - 1)),
- gfp_flags);
+ dsaddr = kzalloc(struct_size(dsaddr, ds_list, num), gfp_flags);
if (!dsaddr)
goto out_err_free_stripe_indices;
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index a553d59afa8b..1ec79ccf89ad 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -30,14 +30,20 @@
#define FF_LAYOUT_POLL_RETRY_MAX (15*HZ)
#define FF_LAYOUTRETURN_MAXERR 20
+enum nfs4_ff_op_type {
+ NFS4_FF_OP_LAYOUTSTATS,
+ NFS4_FF_OP_LAYOUTRETURN,
+};
+
static unsigned short io_maxretrans;
static const struct pnfs_commit_ops ff_layout_commit_ops;
static void ff_layout_read_record_layoutstats_done(struct rpc_task *task,
struct nfs_pgio_header *hdr);
-static int ff_layout_mirror_prepare_stats(struct pnfs_layout_hdr *lo,
+static int
+ff_layout_mirror_prepare_stats(struct pnfs_layout_hdr *lo,
struct nfs42_layoutstat_devinfo *devinfo,
- int dev_limit);
+ int dev_limit, enum nfs4_ff_op_type type);
static void ff_layout_encode_ff_layoutupdate(struct xdr_stream *xdr,
const struct nfs42_layoutstat_devinfo *devinfo,
struct nfs4_ff_layout_mirror *mirror);
@@ -663,7 +669,7 @@ nfs4_ff_layout_stat_io_start_read(struct inode *inode,
spin_unlock(&mirror->lock);
if (report)
- pnfs_report_layoutstat(inode, GFP_KERNEL);
+ pnfs_report_layoutstat(inode, nfs_io_gfp_mask());
}
static void
@@ -694,7 +700,7 @@ nfs4_ff_layout_stat_io_start_write(struct inode *inode,
spin_unlock(&mirror->lock);
if (report)
- pnfs_report_layoutstat(inode, GFP_NOIO);
+ pnfs_report_layoutstat(inode, nfs_io_gfp_mask());
}
static void
@@ -806,13 +812,10 @@ ff_layout_pg_get_read(struct nfs_pageio_descriptor *pgio,
bool strict_iomode)
{
pnfs_put_lseg(pgio->pg_lseg);
- pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
- nfs_req_openctx(req),
- req_offset(req),
- req->wb_bytes,
- IOMODE_READ,
- strict_iomode,
- GFP_KERNEL);
+ pgio->pg_lseg =
+ pnfs_update_layout(pgio->pg_inode, nfs_req_openctx(req),
+ req_offset(req), req->wb_bytes, IOMODE_READ,
+ strict_iomode, nfs_io_gfp_mask());
if (IS_ERR(pgio->pg_lseg)) {
pgio->pg_error = PTR_ERR(pgio->pg_lseg);
pgio->pg_lseg = NULL;
@@ -894,13 +897,10 @@ ff_layout_pg_init_write(struct nfs_pageio_descriptor *pgio,
retry:
ff_layout_pg_check_layout(pgio, req);
if (!pgio->pg_lseg) {
- pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
- nfs_req_openctx(req),
- req_offset(req),
- req->wb_bytes,
- IOMODE_RW,
- false,
- GFP_NOFS);
+ pgio->pg_lseg =
+ pnfs_update_layout(pgio->pg_inode, nfs_req_openctx(req),
+ req_offset(req), req->wb_bytes,
+ IOMODE_RW, false, nfs_io_gfp_mask());
if (IS_ERR(pgio->pg_lseg)) {
pgio->pg_error = PTR_ERR(pgio->pg_lseg);
pgio->pg_lseg = NULL;
@@ -953,13 +953,10 @@ ff_layout_pg_get_mirror_count_write(struct nfs_pageio_descriptor *pgio,
struct nfs_page *req)
{
if (!pgio->pg_lseg) {
- pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
- nfs_req_openctx(req),
- req_offset(req),
- req->wb_bytes,
- IOMODE_RW,
- false,
- GFP_NOFS);
+ pgio->pg_lseg =
+ pnfs_update_layout(pgio->pg_inode, nfs_req_openctx(req),
+ req_offset(req), req->wb_bytes,
+ IOMODE_RW, false, nfs_io_gfp_mask());
if (IS_ERR(pgio->pg_lseg)) {
pgio->pg_error = PTR_ERR(pgio->pg_lseg);
pgio->pg_lseg = NULL;
@@ -1140,6 +1137,8 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task,
case -EIO:
case -ETIMEDOUT:
case -EPIPE:
+ case -EPROTO:
+ case -ENODEV:
dprintk("%s DS connection error %d\n", __func__,
task->tk_status);
nfs4_delete_deviceid(devid->ld, devid->nfs_client,
@@ -1245,6 +1244,8 @@ static void ff_layout_io_track_ds_error(struct pnfs_layout_segment *lseg,
case -ENOBUFS:
case -EPIPE:
case -EPERM:
+ case -EPROTO:
+ case -ENODEV:
*op_status = status = NFS4ERR_NXIO;
break;
case -EACCES:
@@ -1258,7 +1259,7 @@ static void ff_layout_io_track_ds_error(struct pnfs_layout_segment *lseg,
mirror = FF_LAYOUT_COMP(lseg, idx);
err = ff_layout_track_ds_error(FF_LAYOUT_FROM_HDR(lseg->pls_layout),
mirror, offset, length, status, opnum,
- GFP_NOIO);
+ nfs_io_gfp_mask());
switch (status) {
case NFS4ERR_DELAY:
@@ -1378,6 +1379,11 @@ static int ff_layout_read_prepare_common(struct rpc_task *task,
return -EIO;
}
+ if (!pnfs_is_valid_lseg(hdr->lseg)) {
+ rpc_exit(task, -EAGAIN);
+ return -EAGAIN;
+ }
+
ff_layout_read_record_layoutstats_start(task, hdr);
return 0;
}
@@ -1558,6 +1564,11 @@ static int ff_layout_write_prepare_common(struct rpc_task *task,
return -EIO;
}
+ if (!pnfs_is_valid_lseg(hdr->lseg)) {
+ rpc_exit(task, -EAGAIN);
+ return -EAGAIN;
+ }
+
ff_layout_write_record_layoutstats_start(task, hdr);
return 0;
}
@@ -1650,15 +1661,23 @@ static void ff_layout_commit_record_layoutstats_done(struct rpc_task *task,
set_bit(NFS_LSEG_LAYOUTRETURN, &cdata->lseg->pls_flags);
}
-static void ff_layout_commit_prepare_common(struct rpc_task *task,
- struct nfs_commit_data *cdata)
+static int ff_layout_commit_prepare_common(struct rpc_task *task,
+ struct nfs_commit_data *cdata)
{
+ if (!pnfs_is_valid_lseg(cdata->lseg)) {
+ rpc_exit(task, -EAGAIN);
+ return -EAGAIN;
+ }
+
ff_layout_commit_record_layoutstats_start(task, cdata);
+ return 0;
}
static void ff_layout_commit_prepare_v3(struct rpc_task *task, void *data)
{
- ff_layout_commit_prepare_common(task, data);
+ if (ff_layout_commit_prepare_common(task, data))
+ return;
+
rpc_call_start(task);
}
@@ -1954,6 +1973,65 @@ ff_layout_commit_pagelist(struct inode *inode, struct list_head *mds_pages,
ff_layout_initiate_commit);
}
+static bool ff_layout_match_rw(const struct rpc_task *task,
+ const struct nfs_pgio_header *hdr,
+ const struct pnfs_layout_segment *lseg)
+{
+ return hdr->lseg == lseg;
+}
+
+static bool ff_layout_match_commit(const struct rpc_task *task,
+ const struct nfs_commit_data *cdata,
+ const struct pnfs_layout_segment *lseg)
+{
+ return cdata->lseg == lseg;
+}
+
+static bool ff_layout_match_io(const struct rpc_task *task, const void *data)
+{
+ const struct rpc_call_ops *ops = task->tk_ops;
+
+ if (ops == &ff_layout_read_call_ops_v3 ||
+ ops == &ff_layout_read_call_ops_v4 ||
+ ops == &ff_layout_write_call_ops_v3 ||
+ ops == &ff_layout_write_call_ops_v4)
+ return ff_layout_match_rw(task, task->tk_calldata, data);
+ if (ops == &ff_layout_commit_call_ops_v3 ||
+ ops == &ff_layout_commit_call_ops_v4)
+ return ff_layout_match_commit(task, task->tk_calldata, data);
+ return false;
+}
+
+static void ff_layout_cancel_io(struct pnfs_layout_segment *lseg)
+{
+ struct nfs4_ff_layout_segment *flseg = FF_LAYOUT_LSEG(lseg);
+ struct nfs4_ff_layout_mirror *mirror;
+ struct nfs4_ff_layout_ds *mirror_ds;
+ struct nfs4_pnfs_ds *ds;
+ struct nfs_client *ds_clp;
+ struct rpc_clnt *clnt;
+ u32 idx;
+
+ for (idx = 0; idx < flseg->mirror_array_cnt; idx++) {
+ mirror = flseg->mirror_array[idx];
+ mirror_ds = mirror->mirror_ds;
+ if (!mirror_ds)
+ continue;
+ ds = mirror->mirror_ds->ds;
+ if (!ds)
+ continue;
+ ds_clp = ds->ds_clp;
+ if (!ds_clp)
+ continue;
+ clnt = ds_clp->cl_rpcclient;
+ if (!clnt)
+ continue;
+ if (!rpc_cancel_tasks(clnt, -EAGAIN, ff_layout_match_io, lseg))
+ continue;
+ rpc_clnt_disconnect(clnt);
+ }
+}
+
static struct pnfs_ds_commit_info *
ff_layout_get_ds_info(struct inode *inode)
{
@@ -1973,7 +2051,8 @@ ff_layout_setup_ds_info(struct pnfs_ds_commit_info *fl_cinfo,
struct inode *inode = lseg->pls_layout->plh_inode;
struct pnfs_commit_array *array, *new;
- new = pnfs_alloc_commit_array(flseg->mirror_array_cnt, GFP_NOIO);
+ new = pnfs_alloc_commit_array(flseg->mirror_array_cnt,
+ nfs_io_gfp_mask());
if (new) {
spin_lock(&inode->i_lock);
array = pnfs_add_commit_array(fl_cinfo, new, lseg);
@@ -2152,10 +2231,10 @@ ff_layout_prepare_layoutreturn(struct nfs4_layoutreturn_args *args)
struct nfs4_flexfile_layoutreturn_args *ff_args;
struct nfs4_flexfile_layout *ff_layout = FF_LAYOUT_FROM_HDR(args->layout);
- ff_args = kmalloc(sizeof(*ff_args), GFP_KERNEL);
+ ff_args = kmalloc(sizeof(*ff_args), nfs_io_gfp_mask());
if (!ff_args)
goto out_nomem;
- ff_args->pages[0] = alloc_page(GFP_KERNEL);
+ ff_args->pages[0] = alloc_page(nfs_io_gfp_mask());
if (!ff_args->pages[0])
goto out_nomem_free;
@@ -2165,8 +2244,9 @@ ff_layout_prepare_layoutreturn(struct nfs4_layoutreturn_args *args)
FF_LAYOUTRETURN_MAXERR);
spin_lock(&args->inode->i_lock);
- ff_args->num_dev = ff_layout_mirror_prepare_stats(&ff_layout->generic_hdr,
- &ff_args->devinfo[0], ARRAY_SIZE(ff_args->devinfo));
+ ff_args->num_dev = ff_layout_mirror_prepare_stats(
+ &ff_layout->generic_hdr, &ff_args->devinfo[0],
+ ARRAY_SIZE(ff_args->devinfo), NFS4_FF_OP_LAYOUTRETURN);
spin_unlock(&args->inode->i_lock);
args->ld_private->ops = &layoutreturn_ops;
@@ -2192,8 +2272,8 @@ ff_layout_send_layouterror(struct pnfs_layout_segment *lseg)
if (list_empty(&head))
return;
- errors = kmalloc_array(NFS42_LAYOUTERROR_MAX,
- sizeof(*errors), GFP_NOFS);
+ errors = kmalloc_array(NFS42_LAYOUTERROR_MAX, sizeof(*errors),
+ nfs_io_gfp_mask());
if (errors != NULL) {
const struct nfs4_ff_layout_ds_err *pos;
size_t n = 0;
@@ -2400,7 +2480,7 @@ static const struct nfs4_xdr_opaque_ops layoutstat_ops = {
static int
ff_layout_mirror_prepare_stats(struct pnfs_layout_hdr *lo,
struct nfs42_layoutstat_devinfo *devinfo,
- int dev_limit)
+ int dev_limit, enum nfs4_ff_op_type type)
{
struct nfs4_flexfile_layout *ff_layout = FF_LAYOUT_FROM_HDR(lo);
struct nfs4_ff_layout_mirror *mirror;
@@ -2412,7 +2492,9 @@ ff_layout_mirror_prepare_stats(struct pnfs_layout_hdr *lo,
break;
if (IS_ERR_OR_NULL(mirror->mirror_ds))
continue;
- if (!test_and_clear_bit(NFS4_FF_MIRROR_STAT_AVAIL, &mirror->flags))
+ if (!test_and_clear_bit(NFS4_FF_MIRROR_STAT_AVAIL,
+ &mirror->flags) &&
+ type != NFS4_FF_OP_LAYOUTRETURN)
continue;
/* mirror refcount put in cleanup_layoutstats */
if (!refcount_inc_not_zero(&mirror->ref))
@@ -2444,14 +2526,17 @@ ff_layout_prepare_layoutstats(struct nfs42_layoutstat_args *args)
const int dev_count = PNFS_LAYOUTSTATS_MAXDEV;
/* For now, send at most PNFS_LAYOUTSTATS_MAXDEV statistics */
- args->devinfo = kmalloc_array(dev_count, sizeof(*args->devinfo), GFP_NOIO);
+ args->devinfo = kmalloc_array(dev_count, sizeof(*args->devinfo),
+ nfs_io_gfp_mask());
if (!args->devinfo)
return -ENOMEM;
spin_lock(&args->inode->i_lock);
ff_layout = FF_LAYOUT_FROM_HDR(NFS_I(args->inode)->layout);
args->num_dev = ff_layout_mirror_prepare_stats(&ff_layout->generic_hdr,
- &args->devinfo[0], dev_count);
+ &args->devinfo[0],
+ dev_count,
+ NFS4_FF_OP_LAYOUTSTATS);
spin_unlock(&args->inode->i_lock);
if (!args->num_dev) {
kfree(args->devinfo);
@@ -2504,6 +2589,7 @@ static struct pnfs_layoutdriver_type flexfilelayout_type = {
.prepare_layoutreturn = ff_layout_prepare_layoutreturn,
.sync = pnfs_nfs_generic_sync,
.prepare_layoutstats = ff_layout_prepare_layoutstats,
+ .cancel_io = ff_layout_cancel_io,
};
static int __init nfs4flexfilelayout_init(void)
diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
index bfa7202ca7be..e028f5a0ef5f 100644
--- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c
+++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
@@ -113,8 +113,10 @@ nfs4_ff_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev,
goto out_err_drain_dsaddrs;
ds_versions[i].version = be32_to_cpup(p++);
ds_versions[i].minor_version = be32_to_cpup(p++);
- ds_versions[i].rsize = nfs_block_size(be32_to_cpup(p++), NULL);
- ds_versions[i].wsize = nfs_block_size(be32_to_cpup(p++), NULL);
+ ds_versions[i].rsize = nfs_io_size(be32_to_cpup(p++),
+ server->nfs_client->cl_proto);
+ ds_versions[i].wsize = nfs_io_size(be32_to_cpup(p++),
+ server->nfs_client->cl_proto);
ds_versions[i].tightly_coupled = be32_to_cpup(p);
if (ds_versions[i].rsize > NFS_MAX_FILE_IO_SIZE)
diff --git a/fs/nfs/fs_context.c b/fs/nfs/fs_context.c
index 0d444a90f513..09833ec102fc 100644
--- a/fs/nfs/fs_context.c
+++ b/fs/nfs/fs_context.c
@@ -10,6 +10,7 @@
* Split from fs/nfs/super.c by David Howells <dhowells@redhat.com>
*/
+#include <linux/compat.h>
#include <linux/module.h>
#include <linux/fs.h>
#include <linux/fs_context.h>
@@ -20,6 +21,8 @@
#include "nfs.h"
#include "internal.h"
+#include "nfstrace.h"
+
#define NFSDBG_FACILITY NFSDBG_MOUNT
#if IS_ENABLED(CONFIG_NFS_V3)
@@ -79,6 +82,7 @@ enum nfs_param {
Opt_source,
Opt_tcp,
Opt_timeo,
+ Opt_trunkdiscovery,
Opt_udp,
Opt_v,
Opt_vers,
@@ -179,6 +183,7 @@ static const struct fs_parameter_spec nfs_fs_parameters[] = {
fsparam_string("source", Opt_source),
fsparam_flag ("tcp", Opt_tcp),
fsparam_u32 ("timeo", Opt_timeo),
+ fsparam_flag_no("trunkdiscovery", Opt_trunkdiscovery),
fsparam_flag ("udp", Opt_udp),
fsparam_flag ("v2", Opt_v),
fsparam_flag ("v3", Opt_v),
@@ -268,9 +273,9 @@ static const struct constant_table nfs_secflavor_tokens[] = {
* Address family must be initialized, and address must not be
* the ANY address for that family.
*/
-static int nfs_verify_server_address(struct sockaddr *addr)
+static int nfs_verify_server_address(struct sockaddr_storage *addr)
{
- switch (addr->sa_family) {
+ switch (addr->ss_family) {
case AF_INET: {
struct sockaddr_in *sa = (struct sockaddr_in *)addr;
return sa->sin_addr.s_addr != htonl(INADDR_ANY);
@@ -281,7 +286,6 @@ static int nfs_verify_server_address(struct sockaddr *addr)
}
}
- dfprintk(MOUNT, "NFS: Invalid IP address specified\n");
return 0;
}
@@ -375,7 +379,7 @@ static int nfs_parse_security_flavors(struct fs_context *fc,
char *string = param->string, *p;
int ret;
- dfprintk(MOUNT, "NFS: parsing %s=%s option\n", param->key, param->string);
+ trace_nfs_mount_assign(param->key, string);
while ((p = strsep(&string, ":")) != NULL) {
if (!*p)
@@ -477,11 +481,11 @@ static int nfs_fs_context_parse_param(struct fs_context *fc,
unsigned int len;
int ret, opt;
- dfprintk(MOUNT, "NFS: parsing nfs mount option '%s'\n", param->key);
+ trace_nfs_mount_option(param);
opt = fs_parse(fc, nfs_fs_parameters, param, &result);
if (opt < 0)
- return ctx->sloppy ? 1 : opt;
+ return (opt == -ENOPARAM && ctx->sloppy) ? 1 : opt;
if (fc->security)
ctx->has_sec_mnt_opts = 1;
@@ -514,7 +518,7 @@ static int nfs_fs_context_parse_param(struct fs_context *fc,
if (result.negated)
ctx->flags &= ~NFS_MOUNT_SOFTREVAL;
else
- ctx->flags &= NFS_MOUNT_SOFTREVAL;
+ ctx->flags |= NFS_MOUNT_SOFTREVAL;
break;
case Opt_posix:
if (result.negated)
@@ -528,6 +532,12 @@ static int nfs_fs_context_parse_param(struct fs_context *fc,
else
ctx->flags &= ~NFS_MOUNT_NOCTO;
break;
+ case Opt_trunkdiscovery:
+ if (result.negated)
+ ctx->flags &= ~NFS_MOUNT_TRUNK_DISCOVERY;
+ else
+ ctx->flags |= NFS_MOUNT_TRUNK_DISCOVERY;
+ break;
case Opt_ac:
if (result.negated)
ctx->flags |= NFS_MOUNT_NOAC;
@@ -674,6 +684,7 @@ static int nfs_fs_context_parse_param(struct fs_context *fc,
return ret;
break;
case Opt_vers:
+ trace_nfs_mount_assign(param->key, param->string);
ret = nfs_parse_version_string(fc, param->string);
if (ret < 0)
return ret;
@@ -685,6 +696,7 @@ static int nfs_fs_context_parse_param(struct fs_context *fc,
break;
case Opt_proto:
+ trace_nfs_mount_assign(param->key, param->string);
protofamily = AF_INET;
switch (lookup_constant(nfs_xprt_protocol_tokens, param->string, -1)) {
case Opt_xprt_udp6:
@@ -720,6 +732,7 @@ static int nfs_fs_context_parse_param(struct fs_context *fc,
break;
case Opt_mountproto:
+ trace_nfs_mount_assign(param->key, param->string);
mountfamily = AF_INET;
switch (lookup_constant(nfs_xprt_protocol_tokens, param->string, -1)) {
case Opt_xprt_udp6:
@@ -742,6 +755,7 @@ static int nfs_fs_context_parse_param(struct fs_context *fc,
break;
case Opt_addr:
+ trace_nfs_mount_assign(param->key, param->string);
len = rpc_pton(fc->net_ns, param->string, param->size,
&ctx->nfs_server.address,
sizeof(ctx->nfs_server._address));
@@ -750,16 +764,19 @@ static int nfs_fs_context_parse_param(struct fs_context *fc,
ctx->nfs_server.addrlen = len;
break;
case Opt_clientaddr:
+ trace_nfs_mount_assign(param->key, param->string);
kfree(ctx->client_address);
ctx->client_address = param->string;
param->string = NULL;
break;
case Opt_mounthost:
+ trace_nfs_mount_assign(param->key, param->string);
kfree(ctx->mount_server.hostname);
ctx->mount_server.hostname = param->string;
param->string = NULL;
break;
case Opt_mountaddr:
+ trace_nfs_mount_assign(param->key, param->string);
len = rpc_pton(fc->net_ns, param->string, param->size,
&ctx->mount_server.address,
sizeof(ctx->mount_server._address));
@@ -837,7 +854,6 @@ static int nfs_fs_context_parse_param(struct fs_context *fc,
*/
case Opt_sloppy:
ctx->sloppy = true;
- dfprintk(MOUNT, "NFS: relaxing parsing rules\n");
break;
}
@@ -870,10 +886,8 @@ static int nfs_parse_source(struct fs_context *fc,
size_t len;
const char *end;
- if (unlikely(!dev_name || !*dev_name)) {
- dfprintk(MOUNT, "NFS: device name not specified\n");
+ if (unlikely(!dev_name || !*dev_name))
return -EINVAL;
- }
/* Is the host name protected with square brakcets? */
if (*dev_name == '[') {
@@ -913,7 +927,7 @@ static int nfs_parse_source(struct fs_context *fc,
if (!ctx->nfs_server.export_path)
goto out_nomem;
- dfprintk(MOUNT, "NFS: MNTPATH: '%s'\n", ctx->nfs_server.export_path);
+ trace_nfs_mount_path(ctx->nfs_server.export_path);
return 0;
out_bad_devname:
@@ -955,7 +969,7 @@ static int nfs23_parse_monolithic(struct fs_context *fc,
{
struct nfs_fs_context *ctx = nfs_fc2context(fc);
struct nfs_fh *mntfh = ctx->mntfh;
- struct sockaddr *sap = (struct sockaddr *)&ctx->nfs_server.address;
+ struct sockaddr_storage *sap = &ctx->nfs_server._address;
int extra_flags = NFS_MOUNT_LEGACY_INTERFACE;
int ret;
@@ -1030,7 +1044,7 @@ static int nfs23_parse_monolithic(struct fs_context *fc,
memcpy(sap, &data->addr, sizeof(data->addr));
ctx->nfs_server.addrlen = sizeof(data->addr);
ctx->nfs_server.port = ntohs(data->addr.sin_port);
- if (sap->sa_family != AF_INET ||
+ if (sap->ss_family != AF_INET ||
!nfs_verify_server_address(sap))
goto out_no_address;
@@ -1107,7 +1121,6 @@ out_no_sec:
return nfs_invalf(fc, "NFS: nfs_mount_data version supports only AUTH_SYS");
out_nomem:
- dfprintk(MOUNT, "NFS: not enough memory to handle mount options");
return -ENOMEM;
out_no_address:
@@ -1187,7 +1200,7 @@ static int nfs4_parse_monolithic(struct fs_context *fc,
struct nfs4_mount_data *data)
{
struct nfs_fs_context *ctx = nfs_fc2context(fc);
- struct sockaddr *sap = (struct sockaddr *)&ctx->nfs_server.address;
+ struct sockaddr_storage *sap = &ctx->nfs_server._address;
int ret;
char *c;
@@ -1239,7 +1252,7 @@ static int nfs4_parse_monolithic(struct fs_context *fc,
if (IS_ERR(c))
return PTR_ERR(c);
ctx->nfs_server.export_path = c;
- dfprintk(MOUNT, "NFS: MNTPATH: '%s'\n", c);
+ trace_nfs_mount_path(c);
c = strndup_user(data->client_addr.data, 16);
if (IS_ERR(c))
@@ -1301,7 +1314,7 @@ static int nfs_fs_context_validate(struct fs_context *fc)
{
struct nfs_fs_context *ctx = nfs_fc2context(fc);
struct nfs_subversion *nfs_mod;
- struct sockaddr *sap = (struct sockaddr *)&ctx->nfs_server.address;
+ struct sockaddr_storage *sap = &ctx->nfs_server._address;
int max_namelen = PAGE_SIZE;
int max_pathlen = NFS_MAXPATHLEN;
int port = 0;
@@ -1527,7 +1540,7 @@ static int nfs_init_fs_context(struct fs_context *fc)
ctx->version = nfss->nfs_client->rpc_ops->version;
ctx->minorversion = nfss->nfs_client->cl_minorversion;
- memcpy(&ctx->nfs_server.address, &nfss->nfs_client->cl_addr,
+ memcpy(&ctx->nfs_server._address, &nfss->nfs_client->cl_addr,
ctx->nfs_server.addrlen);
if (fc->net_ns != net) {
diff --git a/fs/nfs/fscache-index.c b/fs/nfs/fscache-index.c
deleted file mode 100644
index 573b1da9342c..000000000000
--- a/fs/nfs/fscache-index.c
+++ /dev/null
@@ -1,140 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/* NFS FS-Cache index structure definition
- *
- * Copyright (C) 2008 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- */
-
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/sched.h>
-#include <linux/mm.h>
-#include <linux/nfs_fs.h>
-#include <linux/nfs_fs_sb.h>
-#include <linux/in6.h>
-#include <linux/iversion.h>
-
-#include "internal.h"
-#include "fscache.h"
-
-#define NFSDBG_FACILITY NFSDBG_FSCACHE
-
-/*
- * Define the NFS filesystem for FS-Cache. Upon registration FS-Cache sticks
- * the cookie for the top-level index object for NFS into here. The top-level
- * index can than have other cache objects inserted into it.
- */
-struct fscache_netfs nfs_fscache_netfs = {
- .name = "nfs",
- .version = 0,
-};
-
-/*
- * Register NFS for caching
- */
-int nfs_fscache_register(void)
-{
- return fscache_register_netfs(&nfs_fscache_netfs);
-}
-
-/*
- * Unregister NFS for caching
- */
-void nfs_fscache_unregister(void)
-{
- fscache_unregister_netfs(&nfs_fscache_netfs);
-}
-
-/*
- * Define the server object for FS-Cache. This is used to describe a server
- * object to fscache_acquire_cookie(). It is keyed by the NFS protocol and
- * server address parameters.
- */
-const struct fscache_cookie_def nfs_fscache_server_index_def = {
- .name = "NFS.server",
- .type = FSCACHE_COOKIE_TYPE_INDEX,
-};
-
-/*
- * Define the superblock object for FS-Cache. This is used to describe a
- * superblock object to fscache_acquire_cookie(). It is keyed by all the NFS
- * parameters that might cause a separate superblock.
- */
-const struct fscache_cookie_def nfs_fscache_super_index_def = {
- .name = "NFS.super",
- .type = FSCACHE_COOKIE_TYPE_INDEX,
-};
-
-/*
- * Consult the netfs about the state of an object
- * - This function can be absent if the index carries no state data
- * - The netfs data from the cookie being used as the target is
- * presented, as is the auxiliary data
- */
-static
-enum fscache_checkaux nfs_fscache_inode_check_aux(void *cookie_netfs_data,
- const void *data,
- uint16_t datalen,
- loff_t object_size)
-{
- struct nfs_fscache_inode_auxdata auxdata;
- struct nfs_inode *nfsi = cookie_netfs_data;
-
- if (datalen != sizeof(auxdata))
- return FSCACHE_CHECKAUX_OBSOLETE;
-
- memset(&auxdata, 0, sizeof(auxdata));
- auxdata.mtime_sec = nfsi->vfs_inode.i_mtime.tv_sec;
- auxdata.mtime_nsec = nfsi->vfs_inode.i_mtime.tv_nsec;
- auxdata.ctime_sec = nfsi->vfs_inode.i_ctime.tv_sec;
- auxdata.ctime_nsec = nfsi->vfs_inode.i_ctime.tv_nsec;
-
- if (NFS_SERVER(&nfsi->vfs_inode)->nfs_client->rpc_ops->version == 4)
- auxdata.change_attr = inode_peek_iversion_raw(&nfsi->vfs_inode);
-
- if (memcmp(data, &auxdata, datalen) != 0)
- return FSCACHE_CHECKAUX_OBSOLETE;
-
- return FSCACHE_CHECKAUX_OKAY;
-}
-
-/*
- * Get an extra reference on a read context.
- * - This function can be absent if the completion function doesn't require a
- * context.
- * - The read context is passed back to NFS in the event that a data read on the
- * cache fails with EIO - in which case the server must be contacted to
- * retrieve the data, which requires the read context for security.
- */
-static void nfs_fh_get_context(void *cookie_netfs_data, void *context)
-{
- get_nfs_open_context(context);
-}
-
-/*
- * Release an extra reference on a read context.
- * - This function can be absent if the completion function doesn't require a
- * context.
- */
-static void nfs_fh_put_context(void *cookie_netfs_data, void *context)
-{
- if (context)
- put_nfs_open_context(context);
-}
-
-/*
- * Define the inode object for FS-Cache. This is used to describe an inode
- * object to fscache_acquire_cookie(). It is keyed by the NFS file handle for
- * an inode.
- *
- * Coherency is managed by comparing the copies of i_size, i_mtime and i_ctime
- * held in the cache auxiliary data for the data storage object with those in
- * the inode struct in memory.
- */
-const struct fscache_cookie_def nfs_fscache_inode_object_def = {
- .name = "NFS.fh",
- .type = FSCACHE_COOKIE_TYPE_DATAFILE,
- .check_aux = nfs_fscache_inode_check_aux,
- .get_context = nfs_fh_get_context,
- .put_context = nfs_fh_put_context,
-};
diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c
index d743629e05e1..e861d7bae305 100644
--- a/fs/nfs/fscache.c
+++ b/fs/nfs/fscache.c
@@ -19,27 +19,20 @@
#include "internal.h"
#include "iostat.h"
#include "fscache.h"
+#include "nfstrace.h"
-#define NFSDBG_FACILITY NFSDBG_FSCACHE
+#define NFS_MAX_KEY_LEN 1000
-static struct rb_root nfs_fscache_keys = RB_ROOT;
-static DEFINE_SPINLOCK(nfs_fscache_keys_lock);
-
-/*
- * Layout of the key for an NFS server cache object.
- */
-struct nfs_server_key {
- struct {
- uint16_t nfsversion; /* NFS protocol version */
- uint32_t minorversion; /* NFSv4 minor version */
- uint16_t family; /* address family */
- __be16 port; /* IP port */
- } hdr;
- union {
- struct in_addr ipv4_addr; /* IPv4 address */
- struct in6_addr ipv6_addr; /* IPv6 address */
- };
-} __packed;
+static bool nfs_append_int(char *key, int *_len, unsigned long long x)
+{
+ if (*_len > NFS_MAX_KEY_LEN)
+ return false;
+ if (x == 0)
+ key[(*_len)++] = ',';
+ else
+ *_len += sprintf(key + *_len, ",%llx", x);
+ return true;
+}
/*
* Get the per-client index cookie for an NFS client if the appropriate mount
@@ -47,160 +40,106 @@ struct nfs_server_key {
* - We always try and get an index cookie for the client, but get filehandle
* cookies on a per-superblock basis, depending on the mount flags
*/
-void nfs_fscache_get_client_cookie(struct nfs_client *clp)
+static bool nfs_fscache_get_client_key(struct nfs_client *clp,
+ char *key, int *_len)
{
const struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) &clp->cl_addr;
const struct sockaddr_in *sin = (struct sockaddr_in *) &clp->cl_addr;
- struct nfs_server_key key;
- uint16_t len = sizeof(key.hdr);
- memset(&key, 0, sizeof(key));
- key.hdr.nfsversion = clp->rpc_ops->version;
- key.hdr.minorversion = clp->cl_minorversion;
- key.hdr.family = clp->cl_addr.ss_family;
+ *_len += snprintf(key + *_len, NFS_MAX_KEY_LEN - *_len,
+ ",%u.%u,%x",
+ clp->rpc_ops->version,
+ clp->cl_minorversion,
+ clp->cl_addr.ss_family);
switch (clp->cl_addr.ss_family) {
case AF_INET:
- key.hdr.port = sin->sin_port;
- key.ipv4_addr = sin->sin_addr;
- len += sizeof(key.ipv4_addr);
- break;
+ if (!nfs_append_int(key, _len, sin->sin_port) ||
+ !nfs_append_int(key, _len, sin->sin_addr.s_addr))
+ return false;
+ return true;
case AF_INET6:
- key.hdr.port = sin6->sin6_port;
- key.ipv6_addr = sin6->sin6_addr;
- len += sizeof(key.ipv6_addr);
- break;
+ if (!nfs_append_int(key, _len, sin6->sin6_port) ||
+ !nfs_append_int(key, _len, sin6->sin6_addr.s6_addr32[0]) ||
+ !nfs_append_int(key, _len, sin6->sin6_addr.s6_addr32[1]) ||
+ !nfs_append_int(key, _len, sin6->sin6_addr.s6_addr32[2]) ||
+ !nfs_append_int(key, _len, sin6->sin6_addr.s6_addr32[3]))
+ return false;
+ return true;
default:
printk(KERN_WARNING "NFS: Unknown network family '%d'\n",
clp->cl_addr.ss_family);
- clp->fscache = NULL;
- return;
+ return false;
}
-
- /* create a cache index for looking up filehandles */
- clp->fscache = fscache_acquire_cookie(nfs_fscache_netfs.primary_index,
- &nfs_fscache_server_index_def,
- &key, len,
- NULL, 0,
- clp, 0, true);
- dfprintk(FSCACHE, "NFS: get client cookie (0x%p/0x%p)\n",
- clp, clp->fscache);
-}
-
-/*
- * Dispose of a per-client cookie
- */
-void nfs_fscache_release_client_cookie(struct nfs_client *clp)
-{
- dfprintk(FSCACHE, "NFS: releasing client cookie (0x%p/0x%p)\n",
- clp, clp->fscache);
-
- fscache_relinquish_cookie(clp->fscache, NULL, false);
- clp->fscache = NULL;
}
/*
- * Get the cache cookie for an NFS superblock. We have to handle
- * uniquification here because the cache doesn't do it for us.
+ * Get the cache cookie for an NFS superblock.
*
* The default uniquifier is just an empty string, but it may be overridden
* either by the 'fsc=xxx' option to mount, or by inheriting it from the parent
* superblock across an automount point of some nature.
*/
-void nfs_fscache_get_super_cookie(struct super_block *sb, const char *uniq, int ulen)
+int nfs_fscache_get_super_cookie(struct super_block *sb, const char *uniq, int ulen)
{
- struct nfs_fscache_key *key, *xkey;
+ struct fscache_volume *vcookie;
struct nfs_server *nfss = NFS_SB(sb);
- struct rb_node **p, *parent;
- int diff;
+ unsigned int len = 3;
+ char *key;
- nfss->fscache_key = NULL;
- nfss->fscache = NULL;
- if (!uniq) {
- uniq = "";
- ulen = 1;
+ if (uniq) {
+ nfss->fscache_uniq = kmemdup_nul(uniq, ulen, GFP_KERNEL);
+ if (!nfss->fscache_uniq)
+ return -ENOMEM;
}
- key = kzalloc(sizeof(*key) + ulen, GFP_KERNEL);
+ key = kmalloc(NFS_MAX_KEY_LEN + 24, GFP_KERNEL);
if (!key)
- return;
-
- key->nfs_client = nfss->nfs_client;
- key->key.super.s_flags = sb->s_flags & NFS_SB_MASK;
- key->key.nfs_server.flags = nfss->flags;
- key->key.nfs_server.rsize = nfss->rsize;
- key->key.nfs_server.wsize = nfss->wsize;
- key->key.nfs_server.acregmin = nfss->acregmin;
- key->key.nfs_server.acregmax = nfss->acregmax;
- key->key.nfs_server.acdirmin = nfss->acdirmin;
- key->key.nfs_server.acdirmax = nfss->acdirmax;
- key->key.nfs_server.fsid = nfss->fsid;
- key->key.rpc_auth.au_flavor = nfss->client->cl_auth->au_flavor;
-
- key->key.uniq_len = ulen;
- memcpy(key->key.uniquifier, uniq, ulen);
-
- spin_lock(&nfs_fscache_keys_lock);
- p = &nfs_fscache_keys.rb_node;
- parent = NULL;
- while (*p) {
- parent = *p;
- xkey = rb_entry(parent, struct nfs_fscache_key, node);
-
- if (key->nfs_client < xkey->nfs_client)
- goto go_left;
- if (key->nfs_client > xkey->nfs_client)
- goto go_right;
-
- diff = memcmp(&key->key, &xkey->key, sizeof(key->key));
- if (diff < 0)
- goto go_left;
- if (diff > 0)
- goto go_right;
-
- if (key->key.uniq_len == 0)
- goto non_unique;
- diff = memcmp(key->key.uniquifier,
- xkey->key.uniquifier,
- key->key.uniq_len);
- if (diff < 0)
- goto go_left;
- if (diff > 0)
- goto go_right;
- goto non_unique;
-
- go_left:
- p = &(*p)->rb_left;
- continue;
- go_right:
- p = &(*p)->rb_right;
+ return -ENOMEM;
+
+ memcpy(key, "nfs", 3);
+ if (!nfs_fscache_get_client_key(nfss->nfs_client, key, &len) ||
+ !nfs_append_int(key, &len, nfss->fsid.major) ||
+ !nfs_append_int(key, &len, nfss->fsid.minor) ||
+ !nfs_append_int(key, &len, sb->s_flags & NFS_SB_MASK) ||
+ !nfs_append_int(key, &len, nfss->flags) ||
+ !nfs_append_int(key, &len, nfss->rsize) ||
+ !nfs_append_int(key, &len, nfss->wsize) ||
+ !nfs_append_int(key, &len, nfss->acregmin) ||
+ !nfs_append_int(key, &len, nfss->acregmax) ||
+ !nfs_append_int(key, &len, nfss->acdirmin) ||
+ !nfs_append_int(key, &len, nfss->acdirmax) ||
+ !nfs_append_int(key, &len, nfss->client->cl_auth->au_flavor))
+ goto out;
+
+ if (ulen > 0) {
+ if (ulen > NFS_MAX_KEY_LEN - len)
+ goto out;
+ key[len++] = ',';
+ memcpy(key + len, uniq, ulen);
+ len += ulen;
}
-
- rb_link_node(&key->node, parent, p);
- rb_insert_color(&key->node, &nfs_fscache_keys);
- spin_unlock(&nfs_fscache_keys_lock);
- nfss->fscache_key = key;
+ key[len] = 0;
/* create a cache index for looking up filehandles */
- nfss->fscache = fscache_acquire_cookie(nfss->nfs_client->fscache,
- &nfs_fscache_super_index_def,
- &key->key,
- sizeof(key->key) + ulen,
- NULL, 0,
- nfss, 0, true);
- dfprintk(FSCACHE, "NFS: get superblock cookie (0x%p/0x%p)\n",
- nfss, nfss->fscache);
- return;
-
-non_unique:
- spin_unlock(&nfs_fscache_keys_lock);
+ vcookie = fscache_acquire_volume(key,
+ NULL, /* preferred_cache */
+ NULL, 0 /* coherency_data */);
+ if (IS_ERR(vcookie)) {
+ if (vcookie != ERR_PTR(-EBUSY)) {
+ kfree(key);
+ return PTR_ERR(vcookie);
+ }
+ pr_err("NFS: Cache volume key already in use (%s)\n", key);
+ vcookie = NULL;
+ }
+ nfss->fscache = vcookie;
+
+out:
kfree(key);
- nfss->fscache_key = NULL;
- nfss->fscache = NULL;
- printk(KERN_WARNING "NFS:"
- " Cache request denied due to non-unique superblock keys\n");
+ return 0;
}
/*
@@ -210,32 +149,9 @@ void nfs_fscache_release_super_cookie(struct super_block *sb)
{
struct nfs_server *nfss = NFS_SB(sb);
- dfprintk(FSCACHE, "NFS: releasing superblock cookie (0x%p/0x%p)\n",
- nfss, nfss->fscache);
-
- fscache_relinquish_cookie(nfss->fscache, NULL, false);
+ fscache_relinquish_volume(nfss->fscache, NULL, false);
nfss->fscache = NULL;
-
- if (nfss->fscache_key) {
- spin_lock(&nfs_fscache_keys_lock);
- rb_erase(&nfss->fscache_key->node, &nfs_fscache_keys);
- spin_unlock(&nfs_fscache_keys_lock);
- kfree(nfss->fscache_key);
- nfss->fscache_key = NULL;
- }
-}
-
-static void nfs_fscache_update_auxdata(struct nfs_fscache_inode_auxdata *auxdata,
- struct nfs_inode *nfsi)
-{
- memset(auxdata, 0, sizeof(*auxdata));
- auxdata->mtime_sec = nfsi->vfs_inode.i_mtime.tv_sec;
- auxdata->mtime_nsec = nfsi->vfs_inode.i_mtime.tv_nsec;
- auxdata->ctime_sec = nfsi->vfs_inode.i_ctime.tv_sec;
- auxdata->ctime_nsec = nfsi->vfs_inode.i_ctime.tv_nsec;
-
- if (NFS_SERVER(&nfsi->vfs_inode)->nfs_client->rpc_ops->version == 4)
- auxdata->change_attr = inode_peek_iversion_raw(&nfsi->vfs_inode);
+ kfree(nfss->fscache_uniq);
}
/*
@@ -251,13 +167,15 @@ void nfs_fscache_init_inode(struct inode *inode)
if (!(nfss->fscache && S_ISREG(inode->i_mode)))
return;
- nfs_fscache_update_auxdata(&auxdata, nfsi);
+ nfs_fscache_update_auxdata(&auxdata, inode);
nfsi->fscache = fscache_acquire_cookie(NFS_SB(inode->i_sb)->fscache,
- &nfs_fscache_inode_object_def,
- nfsi->fh.data, nfsi->fh.size,
- &auxdata, sizeof(auxdata),
- nfsi, nfsi->vfs_inode.i_size, false);
+ 0,
+ nfsi->fh.data, /* index_key */
+ nfsi->fh.size,
+ &auxdata, /* aux_data */
+ sizeof(auxdata),
+ i_size_read(inode));
}
/*
@@ -265,24 +183,13 @@ void nfs_fscache_init_inode(struct inode *inode)
*/
void nfs_fscache_clear_inode(struct inode *inode)
{
- struct nfs_fscache_inode_auxdata auxdata;
struct nfs_inode *nfsi = NFS_I(inode);
struct fscache_cookie *cookie = nfs_i_fscache(inode);
- dfprintk(FSCACHE, "NFS: clear cookie (0x%p/0x%p)\n", nfsi, cookie);
-
- nfs_fscache_update_auxdata(&auxdata, nfsi);
- fscache_relinquish_cookie(cookie, &auxdata, false);
+ fscache_relinquish_cookie(cookie, false);
nfsi->fscache = NULL;
}
-static bool nfs_fscache_can_enable(void *data)
-{
- struct inode *inode = data;
-
- return !inode_is_open_for_write(inode);
-}
-
/*
* Enable or disable caching for a file that is being opened as appropriate.
* The cookie is allocated when the inode is initialised, but is not enabled at
@@ -305,216 +212,137 @@ static bool nfs_fscache_can_enable(void *data)
void nfs_fscache_open_file(struct inode *inode, struct file *filp)
{
struct nfs_fscache_inode_auxdata auxdata;
- struct nfs_inode *nfsi = NFS_I(inode);
struct fscache_cookie *cookie = nfs_i_fscache(inode);
+ bool open_for_write = inode_is_open_for_write(inode);
if (!fscache_cookie_valid(cookie))
return;
- nfs_fscache_update_auxdata(&auxdata, nfsi);
-
- if (inode_is_open_for_write(inode)) {
- dfprintk(FSCACHE, "NFS: nfsi 0x%p disabling cache\n", nfsi);
- clear_bit(NFS_INO_FSCACHE, &nfsi->flags);
- fscache_disable_cookie(cookie, &auxdata, true);
- fscache_uncache_all_inode_pages(cookie, inode);
- } else {
- dfprintk(FSCACHE, "NFS: nfsi 0x%p enabling cache\n", nfsi);
- fscache_enable_cookie(cookie, &auxdata, nfsi->vfs_inode.i_size,
- nfs_fscache_can_enable, inode);
- if (fscache_cookie_enabled(cookie))
- set_bit(NFS_INO_FSCACHE, &NFS_I(inode)->flags);
+ fscache_use_cookie(cookie, open_for_write);
+ if (open_for_write) {
+ nfs_fscache_update_auxdata(&auxdata, inode);
+ fscache_invalidate(cookie, &auxdata, i_size_read(inode),
+ FSCACHE_INVAL_DIO_WRITE);
}
}
EXPORT_SYMBOL_GPL(nfs_fscache_open_file);
-/*
- * Release the caching state associated with a page, if the page isn't busy
- * interacting with the cache.
- * - Returns true (can release page) or false (page busy).
- */
-int nfs_fscache_release_page(struct page *page, gfp_t gfp)
+void nfs_fscache_release_file(struct inode *inode, struct file *filp)
{
- if (PageFsCache(page)) {
- struct fscache_cookie *cookie = nfs_i_fscache(page->mapping->host);
-
- BUG_ON(!cookie);
- dfprintk(FSCACHE, "NFS: fscache releasepage (0x%p/0x%p/0x%p)\n",
- cookie, page, NFS_I(page->mapping->host));
-
- if (!fscache_maybe_release_page(cookie, page, gfp))
- return 0;
-
- nfs_inc_fscache_stats(page->mapping->host,
- NFSIOS_FSCACHE_PAGES_UNCACHED);
- }
+ struct nfs_fscache_inode_auxdata auxdata;
+ struct fscache_cookie *cookie = nfs_i_fscache(inode);
+ loff_t i_size = i_size_read(inode);
- return 1;
+ nfs_fscache_update_auxdata(&auxdata, inode);
+ fscache_unuse_cookie(cookie, &auxdata, &i_size);
}
/*
- * Release the caching state associated with a page if undergoing complete page
- * invalidation.
+ * Fallback page reading interface.
*/
-void __nfs_fscache_invalidate_page(struct page *page, struct inode *inode)
+static int fscache_fallback_read_page(struct inode *inode, struct page *page)
{
+ struct netfs_cache_resources cres;
struct fscache_cookie *cookie = nfs_i_fscache(inode);
+ struct iov_iter iter;
+ struct bio_vec bvec[1];
+ int ret;
- BUG_ON(!cookie);
-
- dfprintk(FSCACHE, "NFS: fscache invalidatepage (0x%p/0x%p/0x%p)\n",
- cookie, page, NFS_I(inode));
-
- fscache_wait_on_page_write(cookie, page);
+ memset(&cres, 0, sizeof(cres));
+ bvec[0].bv_page = page;
+ bvec[0].bv_offset = 0;
+ bvec[0].bv_len = PAGE_SIZE;
+ iov_iter_bvec(&iter, READ, bvec, ARRAY_SIZE(bvec), PAGE_SIZE);
- BUG_ON(!PageLocked(page));
- fscache_uncache_page(cookie, page);
- nfs_inc_fscache_stats(page->mapping->host,
- NFSIOS_FSCACHE_PAGES_UNCACHED);
-}
+ ret = fscache_begin_read_operation(&cres, cookie);
+ if (ret < 0)
+ return ret;
-/*
- * Handle completion of a page being read from the cache.
- * - Called in process (keventd) context.
- */
-static void nfs_readpage_from_fscache_complete(struct page *page,
- void *context,
- int error)
-{
- dfprintk(FSCACHE,
- "NFS: readpage_from_fscache_complete (0x%p/0x%p/%d)\n",
- page, context, error);
-
- /*
- * If the read completes with an error, mark the page with PG_checked,
- * unlock the page, and let the VM reissue the readpage.
- */
- if (!error)
- SetPageUptodate(page);
- else
- SetPageChecked(page);
- unlock_page(page);
+ ret = fscache_read(&cres, page_offset(page), &iter, NETFS_READ_HOLE_FAIL,
+ NULL, NULL);
+ fscache_end_operation(&cres);
+ return ret;
}
/*
- * Retrieve a page from fscache
+ * Fallback page writing interface.
*/
-int __nfs_readpage_from_fscache(struct nfs_open_context *ctx,
- struct inode *inode, struct page *page)
+static int fscache_fallback_write_page(struct inode *inode, struct page *page,
+ bool no_space_allocated_yet)
{
+ struct netfs_cache_resources cres;
+ struct fscache_cookie *cookie = nfs_i_fscache(inode);
+ struct iov_iter iter;
+ struct bio_vec bvec[1];
+ loff_t start = page_offset(page);
+ size_t len = PAGE_SIZE;
int ret;
- dfprintk(FSCACHE,
- "NFS: readpage_from_fscache(fsc:%p/p:%p(i:%lx f:%lx)/0x%p)\n",
- nfs_i_fscache(inode), page, page->index, page->flags, inode);
+ memset(&cres, 0, sizeof(cres));
+ bvec[0].bv_page = page;
+ bvec[0].bv_offset = 0;
+ bvec[0].bv_len = PAGE_SIZE;
+ iov_iter_bvec(&iter, WRITE, bvec, ARRAY_SIZE(bvec), PAGE_SIZE);
- if (PageChecked(page)) {
- ClearPageChecked(page);
- return 1;
- }
-
- ret = fscache_read_or_alloc_page(nfs_i_fscache(inode),
- page,
- nfs_readpage_from_fscache_complete,
- ctx,
- GFP_KERNEL);
-
- switch (ret) {
- case 0: /* read BIO submitted (page in fscache) */
- dfprintk(FSCACHE,
- "NFS: readpage_from_fscache: BIO submitted\n");
- nfs_inc_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_READ_OK);
+ ret = fscache_begin_write_operation(&cres, cookie);
+ if (ret < 0)
return ret;
- case -ENOBUFS: /* inode not in cache */
- case -ENODATA: /* page not in cache */
- nfs_inc_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_READ_FAIL);
- dfprintk(FSCACHE,
- "NFS: readpage_from_fscache %d\n", ret);
- return 1;
-
- default:
- dfprintk(FSCACHE, "NFS: readpage_from_fscache %d\n", ret);
- nfs_inc_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_READ_FAIL);
- }
+ ret = cres.ops->prepare_write(&cres, &start, &len, i_size_read(inode),
+ no_space_allocated_yet);
+ if (ret == 0)
+ ret = fscache_write(&cres, page_offset(page), &iter, NULL, NULL);
+ fscache_end_operation(&cres);
return ret;
}
/*
- * Retrieve a set of pages from fscache
+ * Retrieve a page from fscache
*/
-int __nfs_readpages_from_fscache(struct nfs_open_context *ctx,
- struct inode *inode,
- struct address_space *mapping,
- struct list_head *pages,
- unsigned *nr_pages)
+int __nfs_fscache_read_page(struct inode *inode, struct page *page)
{
- unsigned npages = *nr_pages;
int ret;
- dfprintk(FSCACHE, "NFS: nfs_getpages_from_fscache (0x%p/%u/0x%p)\n",
- nfs_i_fscache(inode), npages, inode);
-
- ret = fscache_read_or_alloc_pages(nfs_i_fscache(inode),
- mapping, pages, nr_pages,
- nfs_readpage_from_fscache_complete,
- ctx,
- mapping_gfp_mask(mapping));
- if (*nr_pages < npages)
- nfs_add_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_READ_OK,
- npages);
- if (*nr_pages > 0)
- nfs_add_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_READ_FAIL,
- *nr_pages);
-
- switch (ret) {
- case 0: /* read submitted to the cache for all pages */
- BUG_ON(!list_empty(pages));
- BUG_ON(*nr_pages != 0);
- dfprintk(FSCACHE,
- "NFS: nfs_getpages_from_fscache: submitted\n");
-
- return ret;
-
- case -ENOBUFS: /* some pages aren't cached and can't be */
- case -ENODATA: /* some pages aren't cached */
- dfprintk(FSCACHE,
- "NFS: nfs_getpages_from_fscache: no page: %d\n", ret);
- return 1;
+ trace_nfs_fscache_read_page(inode, page);
+ if (PageChecked(page)) {
+ ClearPageChecked(page);
+ ret = 1;
+ goto out;
+ }
- default:
- dfprintk(FSCACHE,
- "NFS: nfs_getpages_from_fscache: ret %d\n", ret);
+ ret = fscache_fallback_read_page(inode, page);
+ if (ret < 0) {
+ nfs_inc_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_READ_FAIL);
+ SetPageChecked(page);
+ goto out;
}
+ /* Read completed synchronously */
+ nfs_inc_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_READ_OK);
+ SetPageUptodate(page);
+ ret = 0;
+out:
+ trace_nfs_fscache_read_page_exit(inode, page, ret);
return ret;
}
/*
- * Store a newly fetched page in fscache
- * - PG_fscache must be set on the page
+ * Store a newly fetched page in fscache. We can be certain there's no page
+ * stored in the cache as yet otherwise we would've read it from there.
*/
-void __nfs_readpage_to_fscache(struct inode *inode, struct page *page, int sync)
+void __nfs_fscache_write_page(struct inode *inode, struct page *page)
{
int ret;
- dfprintk(FSCACHE,
- "NFS: readpage_to_fscache(fsc:%p/p:%p(i:%lx f:%lx)/%d)\n",
- nfs_i_fscache(inode), page, page->index, page->flags, sync);
+ trace_nfs_fscache_write_page(inode, page);
- ret = fscache_write_page(nfs_i_fscache(inode), page,
- inode->i_size, GFP_KERNEL);
- dfprintk(FSCACHE,
- "NFS: readpage_to_fscache: p:%p(i:%lu f:%lx) ret %d\n",
- page, page->index, page->flags, ret);
+ ret = fscache_fallback_write_page(inode, page, true);
if (ret != 0) {
- fscache_uncache_page(nfs_i_fscache(inode), page);
- nfs_inc_fscache_stats(inode,
- NFSIOS_FSCACHE_PAGES_WRITTEN_FAIL);
+ nfs_inc_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_WRITTEN_FAIL);
nfs_inc_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_UNCACHED);
} else {
- nfs_inc_fscache_stats(inode,
- NFSIOS_FSCACHE_PAGES_WRITTEN_OK);
+ nfs_inc_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_WRITTEN_OK);
}
+ trace_nfs_fscache_write_page_exit(inode, page, ret);
}
diff --git a/fs/nfs/fscache.h b/fs/nfs/fscache.h
index 6754c8607230..2a37af880978 100644
--- a/fs/nfs/fscache.h
+++ b/fs/nfs/fscache.h
@@ -8,51 +8,16 @@
#ifndef _NFS_FSCACHE_H
#define _NFS_FSCACHE_H
+#include <linux/swap.h>
#include <linux/nfs_fs.h>
#include <linux/nfs_mount.h>
#include <linux/nfs4_mount.h>
#include <linux/fscache.h>
+#include <linux/iversion.h>
#ifdef CONFIG_NFS_FSCACHE
/*
- * set of NFS FS-Cache objects that form a superblock key
- */
-struct nfs_fscache_key {
- struct rb_node node;
- struct nfs_client *nfs_client; /* the server */
-
- /* the elements of the unique key - as used by nfs_compare_super() and
- * nfs_compare_mount_options() to distinguish superblocks */
- struct {
- struct {
- unsigned long s_flags; /* various flags
- * (& NFS_MS_MASK) */
- } super;
-
- struct {
- struct nfs_fsid fsid;
- int flags;
- unsigned int rsize; /* read size */
- unsigned int wsize; /* write size */
- unsigned int acregmin; /* attr cache timeouts */
- unsigned int acregmax;
- unsigned int acdirmin;
- unsigned int acdirmax;
- } nfs_server;
-
- struct {
- rpc_authflavor_t au_flavor;
- } rpc_auth;
-
- /* uniquifier - can be used if nfs_server.flags includes
- * NFS_MOUNT_UNSHARED */
- u8 uniq_len;
- char uniquifier[0];
- } key;
-};
-
-/*
* Definition of the auxiliary data attached to NFS inode storage objects
* within the cache.
*
@@ -70,84 +35,39 @@ struct nfs_fscache_inode_auxdata {
};
/*
- * fscache-index.c
- */
-extern struct fscache_netfs nfs_fscache_netfs;
-extern const struct fscache_cookie_def nfs_fscache_server_index_def;
-extern const struct fscache_cookie_def nfs_fscache_super_index_def;
-extern const struct fscache_cookie_def nfs_fscache_inode_object_def;
-
-extern int nfs_fscache_register(void);
-extern void nfs_fscache_unregister(void);
-
-/*
* fscache.c
*/
-extern void nfs_fscache_get_client_cookie(struct nfs_client *);
-extern void nfs_fscache_release_client_cookie(struct nfs_client *);
-
-extern void nfs_fscache_get_super_cookie(struct super_block *, const char *, int);
+extern int nfs_fscache_get_super_cookie(struct super_block *, const char *, int);
extern void nfs_fscache_release_super_cookie(struct super_block *);
extern void nfs_fscache_init_inode(struct inode *);
extern void nfs_fscache_clear_inode(struct inode *);
extern void nfs_fscache_open_file(struct inode *, struct file *);
+extern void nfs_fscache_release_file(struct inode *, struct file *);
-extern void __nfs_fscache_invalidate_page(struct page *, struct inode *);
-extern int nfs_fscache_release_page(struct page *, gfp_t);
+extern int __nfs_fscache_read_page(struct inode *, struct page *);
+extern void __nfs_fscache_write_page(struct inode *, struct page *);
-extern int __nfs_readpage_from_fscache(struct nfs_open_context *,
- struct inode *, struct page *);
-extern int __nfs_readpages_from_fscache(struct nfs_open_context *,
- struct inode *, struct address_space *,
- struct list_head *, unsigned *);
-extern void __nfs_readpage_to_fscache(struct inode *, struct page *, int);
-
-/*
- * wait for a page to complete writing to the cache
- */
-static inline void nfs_fscache_wait_on_page_write(struct nfs_inode *nfsi,
- struct page *page)
-{
- if (PageFsCache(page))
- fscache_wait_on_page_write(nfsi->fscache, page);
-}
-
-/*
- * release the caching state associated with a page if undergoing complete page
- * invalidation
- */
-static inline void nfs_fscache_invalidate_page(struct page *page,
- struct inode *inode)
+static inline bool nfs_fscache_release_folio(struct folio *folio, gfp_t gfp)
{
- if (PageFsCache(page))
- __nfs_fscache_invalidate_page(page, inode);
+ if (folio_test_fscache(folio)) {
+ if (current_is_kswapd() || !(gfp & __GFP_FS))
+ return false;
+ folio_wait_fscache(folio);
+ fscache_note_page_release(nfs_i_fscache(folio->mapping->host));
+ nfs_inc_fscache_stats(folio->mapping->host,
+ NFSIOS_FSCACHE_PAGES_UNCACHED);
+ }
+ return true;
}
/*
* Retrieve a page from an inode data storage object.
*/
-static inline int nfs_readpage_from_fscache(struct nfs_open_context *ctx,
- struct inode *inode,
- struct page *page)
+static inline int nfs_fscache_read_page(struct inode *inode, struct page *page)
{
- if (NFS_I(inode)->fscache)
- return __nfs_readpage_from_fscache(ctx, inode, page);
- return -ENOBUFS;
-}
-
-/*
- * Retrieve a set of pages from an inode data storage object.
- */
-static inline int nfs_readpages_from_fscache(struct nfs_open_context *ctx,
- struct inode *inode,
- struct address_space *mapping,
- struct list_head *pages,
- unsigned *nr_pages)
-{
- if (NFS_I(inode)->fscache)
- return __nfs_readpages_from_fscache(ctx, inode, mapping, pages,
- nr_pages);
+ if (nfs_i_fscache(inode))
+ return __nfs_fscache_read_page(inode, page);
return -ENOBUFS;
}
@@ -155,28 +75,39 @@ static inline int nfs_readpages_from_fscache(struct nfs_open_context *ctx,
* Store a page newly fetched from the server in an inode data storage object
* in the cache.
*/
-static inline void nfs_readpage_to_fscache(struct inode *inode,
- struct page *page,
- int sync)
+static inline void nfs_fscache_write_page(struct inode *inode,
+ struct page *page)
{
- if (PageFsCache(page))
- __nfs_readpage_to_fscache(inode, page, sync);
+ if (nfs_i_fscache(inode))
+ __nfs_fscache_write_page(inode, page);
}
-/*
- * Invalidate the contents of fscache for this inode. This will not sleep.
- */
-static inline void nfs_fscache_invalidate(struct inode *inode)
+static inline void nfs_fscache_update_auxdata(struct nfs_fscache_inode_auxdata *auxdata,
+ struct inode *inode)
{
- fscache_invalidate(NFS_I(inode)->fscache);
+ memset(auxdata, 0, sizeof(*auxdata));
+ auxdata->mtime_sec = inode->i_mtime.tv_sec;
+ auxdata->mtime_nsec = inode->i_mtime.tv_nsec;
+ auxdata->ctime_sec = inode->i_ctime.tv_sec;
+ auxdata->ctime_nsec = inode->i_ctime.tv_nsec;
+
+ if (NFS_SERVER(inode)->nfs_client->rpc_ops->version == 4)
+ auxdata->change_attr = inode_peek_iversion_raw(inode);
}
/*
- * Wait for an object to finish being invalidated.
+ * Invalidate the contents of fscache for this inode. This will not sleep.
*/
-static inline void nfs_fscache_wait_on_invalidate(struct inode *inode)
+static inline void nfs_fscache_invalidate(struct inode *inode, int flags)
{
- fscache_wait_on_invalidate(NFS_I(inode)->fscache);
+ struct nfs_fscache_inode_auxdata auxdata;
+ struct nfs_inode *nfsi = NFS_I(inode);
+
+ if (nfsi->fscache) {
+ nfs_fscache_update_auxdata(&auxdata, inode);
+ fscache_invalidate(nfsi->fscache, &auxdata,
+ i_size_read(inode), flags);
+ }
}
/*
@@ -190,48 +121,24 @@ static inline const char *nfs_server_fscache_state(struct nfs_server *server)
}
#else /* CONFIG_NFS_FSCACHE */
-static inline int nfs_fscache_register(void) { return 0; }
-static inline void nfs_fscache_unregister(void) {}
-
-static inline void nfs_fscache_get_client_cookie(struct nfs_client *clp) {}
-static inline void nfs_fscache_release_client_cookie(struct nfs_client *clp) {}
-
static inline void nfs_fscache_release_super_cookie(struct super_block *sb) {}
static inline void nfs_fscache_init_inode(struct inode *inode) {}
static inline void nfs_fscache_clear_inode(struct inode *inode) {}
static inline void nfs_fscache_open_file(struct inode *inode,
struct file *filp) {}
+static inline void nfs_fscache_release_file(struct inode *inode, struct file *file) {}
-static inline int nfs_fscache_release_page(struct page *page, gfp_t gfp)
-{
- return 1; /* True: may release page */
-}
-static inline void nfs_fscache_invalidate_page(struct page *page,
- struct inode *inode) {}
-static inline void nfs_fscache_wait_on_page_write(struct nfs_inode *nfsi,
- struct page *page) {}
-
-static inline int nfs_readpage_from_fscache(struct nfs_open_context *ctx,
- struct inode *inode,
- struct page *page)
+static inline bool nfs_fscache_release_folio(struct folio *folio, gfp_t gfp)
{
- return -ENOBUFS;
+ return true; /* may release folio */
}
-static inline int nfs_readpages_from_fscache(struct nfs_open_context *ctx,
- struct inode *inode,
- struct address_space *mapping,
- struct list_head *pages,
- unsigned *nr_pages)
+static inline int nfs_fscache_read_page(struct inode *inode, struct page *page)
{
return -ENOBUFS;
}
-static inline void nfs_readpage_to_fscache(struct inode *inode,
- struct page *page, int sync) {}
-
-
-static inline void nfs_fscache_invalidate(struct inode *inode) {}
-static inline void nfs_fscache_wait_on_invalidate(struct inode *inode) {}
+static inline void nfs_fscache_write_page(struct inode *inode, struct page *page) {}
+static inline void nfs_fscache_invalidate(struct inode *inode, int flags) {}
static inline const char *nfs_server_fscache_state(struct nfs_server *server)
{
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index fda530d5e764..6b2cfa59a1a2 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -72,18 +72,13 @@ nfs_fattr_to_ino_t(struct nfs_fattr *fattr)
return nfs_fileid_to_ino_t(fattr->fileid);
}
-static int nfs_wait_killable(int mode)
+int nfs_wait_bit_killable(struct wait_bit_key *key, int mode)
{
- freezable_schedule_unsafe();
+ schedule();
if (signal_pending_state(mode, current))
return -ERESTARTSYS;
return 0;
}
-
-int nfs_wait_bit_killable(struct wait_bit_key *key, int mode)
-{
- return nfs_wait_killable(mode);
-}
EXPORT_SYMBOL_GPL(nfs_wait_bit_killable);
/**
@@ -203,14 +198,13 @@ void nfs_set_cache_invalid(struct inode *inode, unsigned long flags)
NFS_INO_INVALID_OTHER |
NFS_INO_INVALID_XATTR);
flags &= ~(NFS_INO_INVALID_CHANGE | NFS_INO_INVALID_SIZE);
- } else if (flags & NFS_INO_REVAL_PAGECACHE)
- flags |= NFS_INO_INVALID_CHANGE | NFS_INO_INVALID_SIZE;
+ }
if (!nfs_has_xattr_cache(nfsi))
flags &= ~NFS_INO_INVALID_XATTR;
if (flags & NFS_INO_INVALID_DATA)
- nfs_fscache_invalidate(inode);
- flags &= ~(NFS_INO_REVAL_PAGECACHE | NFS_INO_REVAL_FORCED);
+ nfs_fscache_invalidate(inode, 0);
+ flags &= ~NFS_INO_REVAL_FORCED;
nfsi->cache_validity |= flags;
@@ -236,19 +230,17 @@ static void nfs_zap_caches_locked(struct inode *inode)
nfsi->attrtimeo = NFS_MINATTRTIMEO(inode);
nfsi->attrtimeo_timestamp = jiffies;
- if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)) {
- nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR
- | NFS_INO_INVALID_DATA
- | NFS_INO_INVALID_ACCESS
- | NFS_INO_INVALID_ACL
- | NFS_INO_INVALID_XATTR
- | NFS_INO_REVAL_PAGECACHE);
- } else
- nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR
- | NFS_INO_INVALID_ACCESS
- | NFS_INO_INVALID_ACL
- | NFS_INO_INVALID_XATTR
- | NFS_INO_REVAL_PAGECACHE);
+ if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))
+ nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR |
+ NFS_INO_INVALID_DATA |
+ NFS_INO_INVALID_ACCESS |
+ NFS_INO_INVALID_ACL |
+ NFS_INO_INVALID_XATTR);
+ else
+ nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR |
+ NFS_INO_INVALID_ACCESS |
+ NFS_INO_INVALID_ACL |
+ NFS_INO_INVALID_XATTR);
nfs_zap_label_cache_locked(nfsi);
}
@@ -321,7 +313,7 @@ struct nfs_find_desc {
static int
nfs_find_actor(struct inode *inode, void *opaque)
{
- struct nfs_find_desc *desc = (struct nfs_find_desc *)opaque;
+ struct nfs_find_desc *desc = opaque;
struct nfs_fh *fh = desc->fh;
struct nfs_fattr *fattr = desc->fattr;
@@ -339,7 +331,7 @@ nfs_find_actor(struct inode *inode, void *opaque)
static int
nfs_init_locked(struct inode *inode, void *opaque)
{
- struct nfs_find_desc *desc = (struct nfs_find_desc *)opaque;
+ struct nfs_find_desc *desc = opaque;
struct nfs_fattr *fattr = desc->fattr;
set_nfs_fileid(inode, fattr->fileid);
@@ -429,6 +421,7 @@ nfs_ilookup(struct super_block *sb, struct nfs_fattr *fattr, struct nfs_fh *fh)
static void nfs_inode_init_regular(struct nfs_inode *nfsi)
{
atomic_long_set(&nfsi->nrequests, 0);
+ atomic_long_set(&nfsi->redirtied_pages, 0);
INIT_LIST_HEAD(&nfsi->commit_info.list);
atomic_long_set(&nfsi->commit_info.ncommit, 0);
atomic_set(&nfsi->commit_info.rpcs_out, 0);
@@ -564,8 +557,6 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
inode->i_gid = fattr->gid;
else if (fattr_supported & NFS_ATTR_FATTR_GROUP)
nfs_set_cache_invalid(inode, NFS_INO_INVALID_OTHER);
- if (nfs_server_capable(inode, NFS_CAP_XATTR))
- nfs_set_cache_invalid(inode, NFS_INO_INVALID_XATTR);
if (fattr->valid & NFS_ATTR_FATTR_BLOCKS_USED)
inode->i_blocks = fattr->du.nfs2.blocks;
else if (fattr_supported & NFS_ATTR_FATTR_BLOCKS_USED &&
@@ -785,26 +776,32 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr,
}
EXPORT_SYMBOL_GPL(nfs_setattr_update_inode);
-static void nfs_readdirplus_parent_cache_miss(struct dentry *dentry)
+/*
+ * Don't request help from readdirplus if the file is being written to,
+ * or if attribute caching is turned off
+ */
+static bool nfs_getattr_readdirplus_enable(const struct inode *inode)
{
- struct dentry *parent;
+ return nfs_server_capable(inode, NFS_CAP_READDIRPLUS) &&
+ !nfs_have_writebacks(inode) && NFS_MAXATTRTIMEO(inode) > 5 * HZ;
+}
- if (!nfs_server_capable(d_inode(dentry), NFS_CAP_READDIRPLUS))
- return;
- parent = dget_parent(dentry);
- nfs_force_use_readdirplus(d_inode(parent));
- dput(parent);
+static void nfs_readdirplus_parent_cache_miss(struct dentry *dentry)
+{
+ if (!IS_ROOT(dentry)) {
+ struct dentry *parent = dget_parent(dentry);
+ nfs_readdir_record_entry_cache_miss(d_inode(parent));
+ dput(parent);
+ }
}
static void nfs_readdirplus_parent_cache_hit(struct dentry *dentry)
{
- struct dentry *parent;
-
- if (!nfs_server_capable(d_inode(dentry), NFS_CAP_READDIRPLUS))
- return;
- parent = dget_parent(dentry);
- nfs_advise_use_readdirplus(d_inode(parent));
- dput(parent);
+ if (!IS_ROOT(dentry)) {
+ struct dentry *parent = dget_parent(dentry);
+ nfs_readdir_record_entry_cache_hit(d_inode(parent));
+ dput(parent);
+ }
}
static u32 nfs_get_valid_attrmask(struct inode *inode)
@@ -840,6 +837,7 @@ int nfs_getattr(struct user_namespace *mnt_userns, const struct path *path,
int err = 0;
bool force_sync = query_flags & AT_STATX_FORCE_SYNC;
bool do_update = false;
+ bool readdirplus_enabled = nfs_getattr_readdirplus_enable(inode);
trace_nfs_getattr_enter(inode);
@@ -848,17 +846,15 @@ int nfs_getattr(struct user_namespace *mnt_userns, const struct path *path,
STATX_INO | STATX_SIZE | STATX_BLOCKS;
if ((query_flags & AT_STATX_DONT_SYNC) && !force_sync) {
- nfs_readdirplus_parent_cache_hit(path->dentry);
+ if (readdirplus_enabled)
+ nfs_readdirplus_parent_cache_hit(path->dentry);
goto out_no_revalidate;
}
/* Flush out writes to the server in order to update c/mtime. */
- if ((request_mask & (STATX_CTIME|STATX_MTIME)) &&
- S_ISREG(inode->i_mode)) {
- err = filemap_write_and_wait(inode->i_mapping);
- if (err)
- goto out;
- }
+ if ((request_mask & (STATX_CTIME | STATX_MTIME)) &&
+ S_ISREG(inode->i_mode))
+ filemap_write_and_wait(inode->i_mapping);
/*
* We may force a getattr if the user cares about atime.
@@ -901,15 +897,12 @@ int nfs_getattr(struct user_namespace *mnt_userns, const struct path *path,
do_update |= cache_validity & NFS_INO_INVALID_BLOCKS;
if (do_update) {
- /* Update the attribute cache */
- if (!(server->flags & NFS_MOUNT_NOAC))
+ if (readdirplus_enabled)
nfs_readdirplus_parent_cache_miss(path->dentry);
- else
- nfs_readdirplus_parent_cache_hit(path->dentry);
err = __nfs_revalidate_inode(server, inode);
if (err)
goto out;
- } else
+ } else if (readdirplus_enabled)
nfs_readdirplus_parent_cache_hit(path->dentry);
out_no_revalidate:
/* Only return attributes that were revalidated. */
@@ -955,7 +948,7 @@ struct nfs_lock_context *nfs_get_lock_context(struct nfs_open_context *ctx)
res = __nfs_find_lock_context(ctx);
rcu_read_unlock();
if (res == NULL) {
- new = kmalloc(sizeof(*new), GFP_KERNEL);
+ new = kmalloc(sizeof(*new), GFP_KERNEL_ACCOUNT);
if (new == NULL)
return ERR_PTR(-ENOMEM);
nfs_init_lock_context(new);
@@ -1033,7 +1026,7 @@ struct nfs_open_context *alloc_nfs_open_context(struct dentry *dentry,
{
struct nfs_open_context *ctx;
- ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
+ ctx = kmalloc(sizeof(*ctx), GFP_KERNEL_ACCOUNT);
if (!ctx)
return ERR_PTR(-ENOMEM);
nfs_sb_active(dentry->d_sb);
@@ -1183,7 +1176,6 @@ int nfs_open(struct inode *inode, struct file *filp)
nfs_fscache_open_file(inode, filp);
return 0;
}
-EXPORT_SYMBOL_GPL(nfs_open);
/*
* This function is called whenever some part of NFS notices that
@@ -1289,6 +1281,7 @@ static int nfs_invalidate_mapping(struct inode *inode, struct address_space *map
{
int ret;
+ nfs_fscache_invalidate(inode, 0);
if (mapping->nrpages != 0) {
if (S_ISREG(inode->i_mode)) {
ret = nfs_sync_mapping(mapping);
@@ -1300,7 +1293,6 @@ static int nfs_invalidate_mapping(struct inode *inode, struct address_space *map
return ret;
}
nfs_inc_stats(inode, NFSIOS_DATAINVALIDATE);
- nfs_fscache_wait_on_invalidate(inode);
dfprintk(PAGECACHE, "NFS: (%s/%Lu) data cache invalidated\n",
inode->i_sb->s_id,
@@ -1335,7 +1327,8 @@ int nfs_clear_invalid_mapping(struct address_space *mapping)
*/
for (;;) {
ret = wait_on_bit_action(bitlock, NFS_INO_INVALIDATING,
- nfs_wait_bit_killable, TASK_KILLABLE);
+ nfs_wait_bit_killable,
+ TASK_KILLABLE|TASK_FREEZABLE_UNSAFE);
if (ret)
goto out;
spin_lock(&inode->i_lock);
@@ -1586,7 +1579,7 @@ struct nfs_fattr *nfs_alloc_fattr(void)
{
struct nfs_fattr *fattr;
- fattr = kmalloc(sizeof(*fattr), GFP_NOFS);
+ fattr = kmalloc(sizeof(*fattr), GFP_KERNEL);
if (fattr != NULL) {
nfs_fattr_init(fattr);
fattr->label = NULL;
@@ -1602,7 +1595,7 @@ struct nfs_fattr *nfs_alloc_fattr_with_label(struct nfs_server *server)
if (!fattr)
return NULL;
- fattr->label = nfs4_label_alloc(server, GFP_NOFS);
+ fattr->label = nfs4_label_alloc(server, GFP_KERNEL);
if (IS_ERR(fattr->label)) {
kfree(fattr);
return NULL;
@@ -1616,7 +1609,7 @@ struct nfs_fh *nfs_alloc_fhandle(void)
{
struct nfs_fh *fh;
- fh = kmalloc(sizeof(struct nfs_fh), GFP_NOFS);
+ fh = kmalloc(sizeof(struct nfs_fh), GFP_KERNEL);
if (fh != NULL)
fh->size = 0;
return fh;
@@ -2241,7 +2234,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
struct inode *nfs_alloc_inode(struct super_block *sb)
{
struct nfs_inode *nfsi;
- nfsi = kmem_cache_alloc(nfs_inode_cachep, GFP_KERNEL);
+ nfsi = alloc_inode_sb(sb, nfs_inode_cachep, GFP_KERNEL);
if (!nfsi)
return NULL;
nfsi->flags = 0UL;
@@ -2274,7 +2267,7 @@ static inline void nfs4_init_once(struct nfs_inode *nfsi)
static void init_once(void *foo)
{
- struct nfs_inode *nfsi = (struct nfs_inode *) foo;
+ struct nfs_inode *nfsi = foo;
inode_init_once(&nfsi->vfs_inode);
INIT_LIST_HEAD(&nfsi->open_files);
@@ -2374,10 +2367,6 @@ static int __init init_nfs_fs(void)
if (err < 0)
goto out9;
- err = nfs_fscache_register();
- if (err < 0)
- goto out8;
-
err = nfsiod_start();
if (err)
goto out7;
@@ -2429,8 +2418,6 @@ out5:
out6:
nfsiod_stop();
out7:
- nfs_fscache_unregister();
-out8:
unregister_pernet_subsys(&nfs_net_ops);
out9:
nfs_sysfs_exit();
@@ -2445,7 +2432,6 @@ static void __exit exit_nfs_fs(void)
nfs_destroy_readpagecache();
nfs_destroy_inodecache();
nfs_destroy_nfspagecache();
- nfs_fscache_unregister();
unregister_pernet_subsys(&nfs_net_ops);
rpc_proc_unregister(&init_net, "nfs");
unregister_nfs_fs();
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 12f6acb483bb..647fc3f547cb 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -42,6 +42,16 @@ static inline bool nfs_lookup_is_soft_revalidate(const struct dentry *dentry)
return true;
}
+static inline fmode_t flags_to_mode(int flags)
+{
+ fmode_t res = (__force fmode_t)flags & FMODE_EXEC;
+ if ((flags & O_ACCMODE) != O_WRONLY)
+ res |= FMODE_READ;
+ if ((flags & O_ACCMODE) != O_RDONLY)
+ res |= FMODE_WRITE;
+ return res;
+}
+
/*
* Note: RFC 1813 doesn't limit the number of auth flavors that
* a server can return, so make something up.
@@ -59,7 +69,7 @@ static inline bool nfs_lookup_is_soft_revalidate(const struct dentry *dentry)
struct nfs_client_initdata {
unsigned long init_flags;
const char *hostname; /* Hostname of the server */
- const struct sockaddr *addr; /* Address of the server */
+ const struct sockaddr_storage *addr; /* Address of the server */
const char *nodename; /* Hostname of the client */
const char *ip_addr; /* IP address of the client */
size_t addrlen;
@@ -170,7 +180,7 @@ static inline struct nfs_fs_context *nfs_fc2context(const struct fs_context *fc)
/* mount_clnt.c */
struct nfs_mount_request {
- struct sockaddr *sap;
+ struct sockaddr_storage *sap;
size_t salen;
char *hostname;
char *dirpath;
@@ -213,7 +223,7 @@ extern void nfs4_server_set_init_caps(struct nfs_server *);
extern struct nfs_server *nfs4_create_server(struct fs_context *);
extern struct nfs_server *nfs4_create_referral_server(struct fs_context *);
extern int nfs4_update_server(struct nfs_server *server, const char *hostname,
- struct sockaddr *sap, size_t salen,
+ struct sockaddr_storage *sap, size_t salen,
struct net *net);
extern void nfs_free_server(struct nfs_server *server);
extern struct nfs_server *nfs_clone_server(struct nfs_server *,
@@ -225,7 +235,7 @@ extern int nfs_client_init_status(const struct nfs_client *clp);
extern int nfs_wait_client_init_complete(const struct nfs_client *clp);
extern void nfs_mark_client_ready(struct nfs_client *clp, int state);
extern struct nfs_client *nfs4_set_ds_client(struct nfs_server *mds_srv,
- const struct sockaddr *ds_addr,
+ const struct sockaddr_storage *ds_addr,
int ds_addrlen, int ds_proto,
unsigned int ds_timeo,
unsigned int ds_retrans,
@@ -233,7 +243,7 @@ extern struct nfs_client *nfs4_set_ds_client(struct nfs_server *mds_srv,
extern struct rpc_clnt *nfs4_find_or_create_ds_client(struct nfs_client *,
struct inode *);
extern struct nfs_client *nfs3_set_ds_client(struct nfs_server *mds_srv,
- const struct sockaddr *ds_addr, int ds_addrlen,
+ const struct sockaddr_storage *ds_addr, int ds_addrlen,
int ds_proto, unsigned int ds_timeo,
unsigned int ds_retrans);
#ifdef CONFIG_PROC_FS
@@ -366,13 +376,14 @@ extern struct nfs_client *nfs_init_client(struct nfs_client *clp,
const struct nfs_client_initdata *);
/* dir.c */
-extern void nfs_advise_use_readdirplus(struct inode *dir);
-extern void nfs_force_use_readdirplus(struct inode *dir);
+extern void nfs_readdir_record_entry_cache_hit(struct inode *dir);
+extern void nfs_readdir_record_entry_cache_miss(struct inode *dir);
extern unsigned long nfs_access_cache_count(struct shrinker *shrink,
struct shrink_control *sc);
extern unsigned long nfs_access_cache_scan(struct shrinker *shrink,
struct shrink_control *sc);
struct dentry *nfs_lookup(struct inode *, struct dentry *, unsigned int);
+void nfs_d_prune_case_insensitive_aliases(struct inode *inode);
int nfs_create(struct user_namespace *, struct inode *, struct dentry *,
umode_t, bool);
int nfs_mkdir(struct user_namespace *, struct inode *, struct dentry *,
@@ -387,6 +398,20 @@ int nfs_mknod(struct user_namespace *, struct inode *, struct dentry *, umode_t,
int nfs_rename(struct user_namespace *, struct inode *, struct dentry *,
struct inode *, struct dentry *, unsigned int);
+#ifdef CONFIG_NFS_V4_2
+static inline __u32 nfs_access_xattr_mask(const struct nfs_server *server)
+{
+ if (!(server->caps & NFS_CAP_XATTR))
+ return 0;
+ return NFS4_ACCESS_XAREAD | NFS4_ACCESS_XAWRITE | NFS4_ACCESS_XALIST;
+}
+#else
+static inline __u32 nfs_access_xattr_mask(const struct nfs_server *server)
+{
+ return 0;
+}
+#endif
+
/* file.c */
int nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync);
loff_t nfs_file_llseek(struct file *, loff_t, int);
@@ -410,7 +435,6 @@ extern void nfs_zap_acl_cache(struct inode *inode);
extern void nfs_set_cache_invalid(struct inode *inode, unsigned long flags);
extern bool nfs_check_cache_invalid(struct inode *, unsigned long);
extern int nfs_wait_bit_killable(struct wait_bit_key *key, int mode);
-extern int nfs_wait_atomic_killable(atomic_t *p, unsigned int mode);
/* super.c */
extern const struct super_operations nfs_sops;
@@ -478,7 +502,6 @@ extern void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
const struct nfs_pgio_completion_ops *compl_ops);
extern void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio);
extern void nfs_commit_free(struct nfs_commit_data *p);
-extern void nfs_write_prepare(struct rpc_task *task, void *calldata);
extern void nfs_commit_prepare(struct rpc_task *task, void *calldata);
extern int nfs_initiate_commit(struct rpc_clnt *clnt,
struct nfs_commit_data *data,
@@ -553,8 +576,10 @@ void nfs_clear_pnfs_ds_commit_verifiers(struct pnfs_ds_commit_info *cinfo)
#endif
#ifdef CONFIG_MIGRATION
-extern int nfs_migrate_page(struct address_space *,
- struct page *, struct page *, enum migrate_mode);
+int nfs_migrate_folio(struct address_space *, struct folio *dst,
+ struct folio *src, enum migrate_mode);
+#else
+#define nfs_migrate_folio NULL
#endif
static inline int
@@ -572,6 +597,38 @@ nfs_write_match_verf(const struct nfs_writeverf *verf,
!nfs_write_verifier_cmp(&req->wb_verf, &verf->verifier);
}
+static inline gfp_t nfs_io_gfp_mask(void)
+{
+ if (current->flags & PF_WQ_WORKER)
+ return GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN;
+ return GFP_KERNEL;
+}
+
+/*
+ * Special version of should_remove_suid() that ignores capabilities.
+ */
+static inline int nfs_should_remove_suid(const struct inode *inode)
+{
+ umode_t mode = inode->i_mode;
+ int kill = 0;
+
+ /* suid always must be killed */
+ if (unlikely(mode & S_ISUID))
+ kill = ATTR_KILL_SUID;
+
+ /*
+ * sgid without any exec bits is just a mandatory locking mark; leave
+ * it alone. If some exec bits are set, it's a real sgid; kill it.
+ */
+ if (unlikely((mode & S_ISGID) && (mode & S_IXGRP)))
+ kill |= ATTR_KILL_SGID;
+
+ if (unlikely(kill && S_ISREG(mode)))
+ return kill;
+
+ return 0;
+}
+
/* unlink.c */
extern struct rpc_task *
nfs_async_rename(struct inode *old_dir, struct inode *new_dir,
@@ -673,6 +730,24 @@ unsigned long nfs_block_size(unsigned long bsize, unsigned char *nrbitsp)
}
/*
+ * Compute and set NFS server rsize / wsize
+ */
+static inline
+unsigned long nfs_io_size(unsigned long iosize, enum xprt_transports proto)
+{
+ if (iosize < NFS_MIN_FILE_IO_SIZE)
+ iosize = NFS_DEF_FILE_IO_SIZE;
+ else if (iosize >= NFS_MAX_FILE_IO_SIZE)
+ iosize = NFS_MAX_FILE_IO_SIZE;
+ else
+ iosize = iosize & PAGE_MASK;
+
+ if (proto == XPRT_TRANSPORT_UDP)
+ return nfs_block_bits(iosize, NULL);
+ return iosize;
+}
+
+/*
* Determine the maximum file size for a superblock
*/
static inline
@@ -809,6 +884,7 @@ static inline bool nfs_error_is_fatal_on_server(int err)
case 0:
case -ERESTARTSYS:
case -EINTR:
+ case -ENOMEM:
return false;
}
return nfs_error_is_fatal(err);
@@ -818,11 +894,44 @@ static inline bool nfs_error_is_fatal_on_server(int err)
* Select between a default port value and a user-specified port value.
* If a zero value is set, then autobind will be used.
*/
-static inline void nfs_set_port(struct sockaddr *sap, int *port,
+static inline void nfs_set_port(struct sockaddr_storage *sap, int *port,
const unsigned short default_port)
{
if (*port == NFS_UNSPEC_PORT)
*port = default_port;
- rpc_set_port(sap, *port);
-}
+ rpc_set_port((struct sockaddr *)sap, *port);
+}
+
+struct nfs_direct_req {
+ struct kref kref; /* release manager */
+
+ /* I/O parameters */
+ struct nfs_open_context *ctx; /* file open context info */
+ struct nfs_lock_context *l_ctx; /* Lock context info */
+ struct kiocb * iocb; /* controlling i/o request */
+ struct inode * inode; /* target file of i/o */
+
+ /* completion state */
+ atomic_t io_count; /* i/os we're waiting for */
+ spinlock_t lock; /* protect completion state */
+
+ loff_t io_start; /* Start offset for I/O */
+ ssize_t count, /* bytes actually processed */
+ max_count, /* max expected count */
+ bytes_left, /* bytes left to be sent */
+ error; /* any reported error */
+ struct completion completion; /* wait for i/o completion */
+
+ /* commit state */
+ struct nfs_mds_commit_info mds_cinfo; /* Storage for cinfo */
+ struct pnfs_ds_commit_info ds_cinfo; /* Storage for cinfo */
+ struct work_struct work;
+ int flags;
+ /* for write */
+#define NFS_ODIRECT_DO_COMMIT (1) /* an unstable reply was received */
+#define NFS_ODIRECT_RESCHED_WRITES (2) /* write verification failed */
+ /* for read */
+#define NFS_ODIRECT_SHOULD_DIRTY (3) /* dirty user-space page after read */
+#define NFS_ODIRECT_DONE INT_MAX /* write verification failed */
+};
diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c
index c5e3b6b3366a..68e76b626371 100644
--- a/fs/nfs/mount_clnt.c
+++ b/fs/nfs/mount_clnt.c
@@ -158,7 +158,7 @@ int nfs_mount(struct nfs_mount_request *info, int timeo, int retrans)
struct rpc_create_args args = {
.net = info->net,
.protocol = info->protocol,
- .address = info->sap,
+ .address = (struct sockaddr *)info->sap,
.addrsize = info->salen,
.timeout = &mnt_timeout,
.servername = info->hostname,
@@ -245,7 +245,7 @@ void nfs_umount(const struct nfs_mount_request *info)
struct rpc_create_args args = {
.net = info->net,
.protocol = IPPROTO_UDP,
- .address = info->sap,
+ .address = (struct sockaddr *)info->sap,
.addrsize = info->salen,
.timeout = &nfs_umnt_timeout,
.servername = info->hostname,
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index 3295af4110f1..2f336ace7555 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -175,7 +175,7 @@ struct vfsmount *nfs_d_automount(struct path *path)
}
/* for submounts we want the same server; referrals will reassign */
- memcpy(&ctx->nfs_server.address, &client->cl_addr, client->cl_addrlen);
+ memcpy(&ctx->nfs_server._address, &client->cl_addr, client->cl_addrlen);
ctx->nfs_server.addrlen = client->cl_addrlen;
ctx->nfs_server.port = server->port;
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index 7fba7711e6b3..05c3b4b2b3dd 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -949,13 +949,12 @@ int nfs2_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
error = decode_filename_inline(xdr, &entry->name, &entry->len);
if (unlikely(error))
- return error;
+ return -EAGAIN;
/*
* The type (size and byte order) of nfscookie isn't defined in
* RFC 1094. This implementation assumes that it's an XDR uint32.
*/
- entry->prev_cookie = entry->cookie;
p = xdr_inline_decode(xdr, 4);
if (unlikely(!p))
return -EAGAIN;
diff --git a/fs/nfs/nfs3client.c b/fs/nfs/nfs3client.c
index 5601e47360c2..669cda757a5c 100644
--- a/fs/nfs/nfs3client.c
+++ b/fs/nfs/nfs3client.c
@@ -78,7 +78,7 @@ struct nfs_server *nfs3_clone_server(struct nfs_server *source,
* the MDS.
*/
struct nfs_client *nfs3_set_ds_client(struct nfs_server *mds_srv,
- const struct sockaddr *ds_addr, int ds_addrlen,
+ const struct sockaddr_storage *ds_addr, int ds_addrlen,
int ds_proto, unsigned int ds_timeo, unsigned int ds_retrans)
{
struct rpc_timeout ds_timeout;
@@ -98,7 +98,7 @@ struct nfs_client *nfs3_set_ds_client(struct nfs_server *mds_srv,
char buf[INET6_ADDRSTRLEN + 1];
/* fake a hostname because lockd wants it */
- if (rpc_ntop(ds_addr, buf, sizeof(buf)) <= 0)
+ if (rpc_ntop((struct sockaddr *)ds_addr, buf, sizeof(buf)) <= 0)
return ERR_PTR(-EINVAL);
cl_init.hostname = buf;
@@ -108,7 +108,6 @@ struct nfs_client *nfs3_set_ds_client(struct nfs_server *mds_srv,
if (mds_srv->flags & NFS_MOUNT_NORESVPORT)
__set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags);
- __set_bit(NFS_CS_NOPING, &cl_init.init_flags);
__set_bit(NFS_CS_DS, &cl_init.init_flags);
/* Use the MDS nfs_client cl_ipaddr. */
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 7100514d306b..2e7579626cf0 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -36,7 +36,8 @@ nfs3_rpc_wrapper(struct rpc_clnt *clnt, struct rpc_message *msg, int flags)
res = rpc_call_sync(clnt, msg, flags);
if (res != -EJUKEBOX)
break;
- freezable_schedule_timeout_killable_unsafe(NFS_JUKEBOX_RETRY_TIME);
+ __set_current_state(TASK_KILLABLE|TASK_FREEZABLE_UNSAFE);
+ schedule_timeout(NFS_JUKEBOX_RETRY_TIME);
res = -ERESTARTSYS;
} while (!fatal_signal_pending(current));
return res;
@@ -220,7 +221,8 @@ static int nfs3_proc_lookupp(struct inode *inode, struct nfs_fh *fhandle,
task_flags);
}
-static int nfs3_proc_access(struct inode *inode, struct nfs_access_entry *entry)
+static int nfs3_proc_access(struct inode *inode, struct nfs_access_entry *entry,
+ const struct cred *cred)
{
struct nfs3_accessargs arg = {
.fh = NFS_FH(inode),
@@ -231,7 +233,7 @@ static int nfs3_proc_access(struct inode *inode, struct nfs_access_entry *entry)
.rpc_proc = &nfs3_procedures[NFS3PROC_ACCESS],
.rpc_argp = &arg,
.rpc_resp = &res,
- .rpc_cred = entry->cred,
+ .rpc_cred = cred,
};
int status = -ENOMEM;
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index 9274c9c5efea..3b0b650c9c5a 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -1261,6 +1261,8 @@ static void nfs3_xdr_enc_readdir3args(struct rpc_rqst *req,
static void encode_readdirplus3args(struct xdr_stream *xdr,
const struct nfs3_readdirargs *args)
{
+ uint32_t dircount = args->count;
+ uint32_t maxcount = args->count;
__be32 *p;
encode_nfs_fh3(xdr, args->fh);
@@ -1273,9 +1275,8 @@ static void encode_readdirplus3args(struct xdr_stream *xdr,
* readdirplus: need dircount + buffer size.
* We just make sure we make dircount big enough
*/
- *p++ = cpu_to_be32(args->count >> 3);
-
- *p = cpu_to_be32(args->count);
+ *p++ = cpu_to_be32(dircount);
+ *p = cpu_to_be32(maxcount);
}
static void nfs3_xdr_enc_readdirplus3args(struct rpc_rqst *req,
@@ -1967,7 +1968,6 @@ int nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
bool plus)
{
struct user_namespace *userns = rpc_userns(entry->server->client);
- struct nfs_entry old = *entry;
__be32 *p;
int error;
u64 new_cookie;
@@ -1987,15 +1987,15 @@ int nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
error = decode_fileid3(xdr, &entry->ino);
if (unlikely(error))
- return error;
+ return -EAGAIN;
error = decode_inline_filename3(xdr, &entry->name, &entry->len);
if (unlikely(error))
- return error;
+ return -EAGAIN;
error = decode_cookie3(xdr, &new_cookie);
if (unlikely(error))
- return error;
+ return -EAGAIN;
entry->d_type = DT_UNKNOWN;
@@ -2003,7 +2003,7 @@ int nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
entry->fattr->valid = 0;
error = decode_post_op_attr(xdr, entry->fattr, userns);
if (unlikely(error))
- return error;
+ return -EAGAIN;
if (entry->fattr->valid & NFS_ATTR_FATTR_V3)
entry->d_type = nfs_umode_to_dtype(entry->fattr->mode);
@@ -2018,24 +2018,15 @@ int nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
return -EAGAIN;
if (*p != xdr_zero) {
error = decode_nfs_fh3(xdr, entry->fh);
- if (unlikely(error)) {
- if (error == -E2BIG)
- goto out_truncated;
- return error;
- }
+ if (unlikely(error))
+ return -EAGAIN;
} else
zero_nfs_fh3(entry->fh);
}
- entry->prev_cookie = entry->cookie;
entry->cookie = new_cookie;
return 0;
-
-out_truncated:
- dprintk("NFS: directory entry contains invalid file handle\n");
- *entry = old;
- return -EAGAIN;
}
/*
@@ -2228,6 +2219,7 @@ static int decode_fsinfo3resok(struct xdr_stream *xdr,
/* ignore properties */
result->lease_time = 0;
result->change_attr_type = NFS4_CHANGE_TYPE_IS_UNDEFINED;
+ result->xattr_support = 0;
return 0;
}
diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
index 8b21ff1be717..ecb428512fe1 100644
--- a/fs/nfs/nfs42proc.c
+++ b/fs/nfs/nfs42proc.c
@@ -46,7 +46,7 @@ static int _nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep,
{
struct inode *inode = file_inode(filep);
struct nfs_server *server = NFS_SERVER(inode);
- u32 bitmask[3];
+ u32 bitmask[NFS_BITMASK_SZ];
struct nfs42_falloc_args args = {
.falloc_fh = NFS_FH(inode),
.falloc_offset = offset,
@@ -69,9 +69,8 @@ static int _nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep,
return status;
}
- memcpy(bitmask, server->cache_consistency_bitmask, sizeof(bitmask));
- if (server->attr_bitmask[1] & FATTR4_WORD1_SPACE_USED)
- bitmask[1] |= FATTR4_WORD1_SPACE_USED;
+ nfs4_bitmask_set(bitmask, server->cache_consistency_bitmask, inode,
+ NFS_INO_INVALID_BLOCKS);
res.falloc_fattr = nfs_alloc_fattr();
if (!res.falloc_fattr)
@@ -79,10 +78,15 @@ static int _nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep,
status = nfs4_call_sync(server->client, server, msg,
&args.seq_args, &res.seq_res, 0);
- if (status == 0)
+ if (status == 0) {
+ if (nfs_should_remove_suid(inode)) {
+ spin_lock(&inode->i_lock);
+ nfs_set_cache_invalid(inode, NFS_INO_INVALID_MODE);
+ spin_unlock(&inode->i_lock);
+ }
status = nfs_post_op_update_inode_force_wcc(inode,
res.falloc_fattr);
-
+ }
if (msg->rpc_proc == &nfs4_procedures[NFSPROC4_CLNT_ALLOCATE])
trace_nfs4_fallocate(inode, &args, status);
else
@@ -176,28 +180,27 @@ static int handle_async_copy(struct nfs42_copy_res *res,
nfs4_stateid *src_stateid,
bool *restart)
{
- struct nfs4_copy_state *copy, *tmp_copy;
+ struct nfs4_copy_state *copy, *tmp_copy = NULL, *iter;
int status = NFS4_OK;
- bool found_pending = false;
struct nfs_open_context *dst_ctx = nfs_file_open_context(dst);
struct nfs_open_context *src_ctx = nfs_file_open_context(src);
- copy = kzalloc(sizeof(struct nfs4_copy_state), GFP_NOFS);
+ copy = kzalloc(sizeof(struct nfs4_copy_state), GFP_KERNEL);
if (!copy)
return -ENOMEM;
spin_lock(&dst_server->nfs_client->cl_lock);
- list_for_each_entry(tmp_copy,
+ list_for_each_entry(iter,
&dst_server->nfs_client->pending_cb_stateids,
copies) {
- if (memcmp(&res->write_res.stateid, &tmp_copy->stateid,
+ if (memcmp(&res->write_res.stateid, &iter->stateid,
NFS4_STATEID_SIZE))
continue;
- found_pending = true;
- list_del(&tmp_copy->copies);
+ tmp_copy = iter;
+ list_del(&iter->copies);
break;
}
- if (found_pending) {
+ if (tmp_copy) {
spin_unlock(&dst_server->nfs_client->cl_lock);
kfree(copy);
copy = tmp_copy;
@@ -255,7 +258,7 @@ static int process_copy_commit(struct file *dst, loff_t pos_dst,
struct nfs_commitres cres;
int status = -ENOMEM;
- cres.verf = kzalloc(sizeof(struct nfs_writeverf), GFP_NOFS);
+ cres.verf = kzalloc(sizeof(struct nfs_writeverf), GFP_KERNEL);
if (!cres.verf)
goto out;
@@ -338,7 +341,7 @@ static ssize_t _nfs42_proc_copy(struct file *src,
return status;
}
}
- status = nfs_filemap_write_and_wait_range(file_inode(src)->i_mapping,
+ status = nfs_filemap_write_and_wait_range(src->f_mapping,
pos_src, pos_src + (loff_t)count - 1);
if (status)
return status;
@@ -358,7 +361,7 @@ static ssize_t _nfs42_proc_copy(struct file *src,
res->commit_res.verf = NULL;
if (args->sync) {
res->commit_res.verf =
- kzalloc(sizeof(struct nfs_writeverf), GFP_NOFS);
+ kzalloc(sizeof(struct nfs_writeverf), GFP_KERNEL);
if (!res->commit_res.verf)
return -ENOMEM;
}
@@ -553,7 +556,7 @@ static int nfs42_do_offload_cancel_async(struct file *dst,
if (!(dst_server->caps & NFS_CAP_OFFLOAD_CANCEL))
return -EOPNOTSUPP;
- data = kzalloc(sizeof(struct nfs42_offloadcancel_data), GFP_NOFS);
+ data = kzalloc(sizeof(struct nfs42_offloadcancel_data), GFP_KERNEL);
if (data == NULL)
return -ENOMEM;
@@ -592,8 +595,10 @@ static int _nfs42_proc_copy_notify(struct file *src, struct file *dst,
ctx = get_nfs_open_context(nfs_file_open_context(src));
l_ctx = nfs_get_lock_context(ctx);
- if (IS_ERR(l_ctx))
- return PTR_ERR(l_ctx);
+ if (IS_ERR(l_ctx)) {
+ status = PTR_ERR(l_ctx);
+ goto out;
+ }
status = nfs4_set_rw_stateid(&args->cna_src_stateid, ctx, l_ctx,
FMODE_READ);
@@ -601,7 +606,7 @@ static int _nfs42_proc_copy_notify(struct file *src, struct file *dst,
if (status) {
if (status == -EAGAIN)
status = -NFS4ERR_BAD_STATEID;
- return status;
+ goto out;
}
status = nfs4_call_sync(src_server->client, src_server, &msg,
@@ -610,6 +615,7 @@ static int _nfs42_proc_copy_notify(struct file *src, struct file *dst,
if (status == -ENOTSUPP)
src_server->caps &= ~NFS_CAP_COPY_NOTIFY;
+out:
put_nfs_open_context(nfs_file_open_context(src));
return status;
}
@@ -627,7 +633,7 @@ int nfs42_proc_copy_notify(struct file *src, struct file *dst,
if (!(src_server->caps & NFS_CAP_COPY_NOTIFY))
return -EOPNOTSUPP;
- args = kzalloc(sizeof(struct nfs42_copy_notify_args), GFP_NOFS);
+ args = kzalloc(sizeof(struct nfs42_copy_notify_args), GFP_KERNEL);
if (args == NULL)
return -ENOMEM;
@@ -1015,7 +1021,7 @@ int nfs42_proc_layouterror(struct pnfs_layout_segment *lseg,
return -EOPNOTSUPP;
if (n > NFS42_LAYOUTERROR_MAX)
return -EINVAL;
- data = nfs42_alloc_layouterror_data(lseg, GFP_NOFS);
+ data = nfs42_alloc_layouterror_data(lseg, nfs_io_gfp_mask());
if (!data)
return -ENOMEM;
for (i = 0; i < n; i++) {
@@ -1044,13 +1050,14 @@ static int _nfs42_proc_clone(struct rpc_message *msg, struct file *src_f,
struct inode *src_inode = file_inode(src_f);
struct inode *dst_inode = file_inode(dst_f);
struct nfs_server *server = NFS_SERVER(dst_inode);
+ __u32 dst_bitmask[NFS_BITMASK_SZ];
struct nfs42_clone_args args = {
.src_fh = NFS_FH(src_inode),
.dst_fh = NFS_FH(dst_inode),
.src_offset = src_offset,
.dst_offset = dst_offset,
.count = count,
- .dst_bitmask = server->cache_consistency_bitmask,
+ .dst_bitmask = dst_bitmask,
};
struct nfs42_clone_res res = {
.server = server,
@@ -1079,10 +1086,16 @@ static int _nfs42_proc_clone(struct rpc_message *msg, struct file *src_f,
if (!res.dst_fattr)
return -ENOMEM;
+ nfs4_bitmask_set(dst_bitmask, server->cache_consistency_bitmask,
+ dst_inode, NFS_INO_INVALID_BLOCKS);
+
status = nfs4_call_sync(server->client, server, msg,
&args.seq_args, &res.seq_res, 0);
trace_nfs4_clone(src_inode, dst_inode, &args, status);
if (status == 0) {
+ /* a zero-length count means clone to EOF in src */
+ if (count == 0 && res.dst_fattr->valid & NFS_ATTR_FATTR_SIZE)
+ count = nfs_size_to_loff_t(res.dst_fattr->size) - dst_offset;
nfs42_copy_dest_done(dst_inode, dst_offset, count);
status = nfs_post_op_update_inode(dst_inode, res.dst_fattr);
}
@@ -1165,6 +1178,7 @@ static int _nfs42_proc_removexattr(struct inode *inode, const char *name)
ret = nfs4_call_sync(server->client, server, &msg, &args.seq_args,
&res.seq_res, 1);
+ trace_nfs4_removexattr(inode, name, ret);
if (!ret)
nfs4_update_changeattr(inode, &res.cinfo, timestamp, 0);
@@ -1204,6 +1218,7 @@ static int _nfs42_proc_setxattr(struct inode *inode, const char *name,
ret = nfs4_call_sync(server->client, server, &msg, &arg.seq_args,
&res.seq_res, 1);
+ trace_nfs4_setxattr(inode, name, ret);
for (; np > 0; np--)
put_page(pages[np - 1]);
@@ -1236,6 +1251,7 @@ static ssize_t _nfs42_proc_getxattr(struct inode *inode, const char *name,
ret = nfs4_call_sync(server->client, server, &msg, &arg.seq_args,
&res.seq_res, 0);
+ trace_nfs4_getxattr(inode, name, ret);
if (ret < 0)
return ret;
@@ -1307,6 +1323,7 @@ static ssize_t _nfs42_proc_listxattrs(struct inode *inode, void *buf,
ret = nfs4_call_sync(server->client, server, &msg, &arg.seq_args,
&res.seq_res, 0);
+ trace_nfs4_listxattr(inode, ret);
if (ret >= 0) {
ret = res.copied;
diff --git a/fs/nfs/nfs42xattr.c b/fs/nfs/nfs42xattr.c
index 1c4d2a05b401..76ae11834206 100644
--- a/fs/nfs/nfs42xattr.c
+++ b/fs/nfs/nfs42xattr.c
@@ -199,7 +199,7 @@ nfs4_xattr_alloc_entry(const char *name, const void *value,
flags = NFS4_XATTR_ENTRY_EXTVAL;
}
- buf = kmalloc(alloclen, GFP_KERNEL_ACCOUNT | GFP_NOFS);
+ buf = kmalloc(alloclen, GFP_KERNEL);
if (buf == NULL)
return NULL;
entry = (struct nfs4_xattr_entry *)buf;
@@ -213,7 +213,7 @@ nfs4_xattr_alloc_entry(const char *name, const void *value,
if (flags & NFS4_XATTR_ENTRY_EXTVAL) {
- valp = kvmalloc(len, GFP_KERNEL_ACCOUNT | GFP_NOFS);
+ valp = kvmalloc(len, GFP_KERNEL);
if (valp == NULL) {
kfree(buf);
return NULL;
@@ -289,8 +289,7 @@ nfs4_xattr_alloc_cache(void)
{
struct nfs4_xattr_cache *cache;
- cache = kmem_cache_alloc(nfs4_xattr_cache_cachep,
- GFP_KERNEL_ACCOUNT | GFP_NOFS);
+ cache = kmem_cache_alloc(nfs4_xattr_cache_cachep, GFP_KERNEL);
if (cache == NULL)
return NULL;
@@ -982,7 +981,7 @@ nfs4_xattr_entry_count(struct shrinker *shrink, struct shrink_control *sc)
static void nfs4_xattr_cache_init_once(void *p)
{
- struct nfs4_xattr_cache *cache = (struct nfs4_xattr_cache *)p;
+ struct nfs4_xattr_cache *cache = p;
spin_lock_init(&cache->listxattr_lock);
atomic_long_set(&cache->nent, 0);
@@ -998,7 +997,7 @@ int __init nfs4_xattr_cache_init(void)
nfs4_xattr_cache_cachep = kmem_cache_create("nfs4_xattr_cache_cache",
sizeof(struct nfs4_xattr_cache), 0,
- (SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD|SLAB_ACCOUNT),
+ (SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD),
nfs4_xattr_cache_init_once);
if (nfs4_xattr_cache_cachep == NULL)
return -ENOMEM;
@@ -1018,15 +1017,16 @@ int __init nfs4_xattr_cache_init(void)
if (ret)
goto out2;
- ret = register_shrinker(&nfs4_xattr_cache_shrinker);
+ ret = register_shrinker(&nfs4_xattr_cache_shrinker, "nfs-xattr_cache");
if (ret)
goto out1;
- ret = register_shrinker(&nfs4_xattr_entry_shrinker);
+ ret = register_shrinker(&nfs4_xattr_entry_shrinker, "nfs-xattr_entry");
if (ret)
goto out;
- ret = register_shrinker(&nfs4_xattr_large_entry_shrinker);
+ ret = register_shrinker(&nfs4_xattr_large_entry_shrinker,
+ "nfs-xattr_large_entry");
if (!ret)
return 0;
diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c
index 271e5f92ed01..fe1aeb0f048f 100644
--- a/fs/nfs/nfs42xdr.c
+++ b/fs/nfs/nfs42xdr.c
@@ -569,6 +569,14 @@ static int decode_listxattrs(struct xdr_stream *xdr,
*/
if (status == -ETOOSMALL)
status = -ERANGE;
+ /*
+ * Special case: for LISTXATTRS, NFS4ERR_NOXATTR
+ * should be translated to success with zero-length reply.
+ */
+ if (status == -ENODATA) {
+ res->eof = true;
+ status = 0;
+ }
goto out;
}
@@ -1025,82 +1033,95 @@ static int decode_deallocate(struct xdr_stream *xdr, struct nfs42_falloc_res *re
return decode_op_hdr(xdr, OP_DEALLOCATE);
}
-static int decode_read_plus_data(struct xdr_stream *xdr,
- struct nfs_pgio_args *args,
- struct nfs_pgio_res *res)
-{
- uint32_t count, recvd;
+struct read_plus_segment {
+ enum data_content4 type;
uint64_t offset;
- __be32 *p;
-
- p = xdr_inline_decode(xdr, 8 + 4);
- if (!p)
- return 1;
+ union {
+ struct {
+ uint64_t length;
+ } hole;
+
+ struct {
+ uint32_t length;
+ unsigned int from;
+ } data;
+ };
+};
- p = xdr_decode_hyper(p, &offset);
- count = be32_to_cpup(p);
- recvd = xdr_align_data(xdr, res->count, xdr_align_size(count));
- if (recvd > count)
- recvd = count;
- if (res->count + recvd > args->count) {
- if (args->count > res->count)
- res->count += args->count - res->count;
- return 1;
- }
- res->count += recvd;
- if (count > recvd)
- return 1;
- return 0;
+static inline uint64_t read_plus_segment_length(struct read_plus_segment *seg)
+{
+ return seg->type == NFS4_CONTENT_DATA ? seg->data.length : seg->hole.length;
}
-static int decode_read_plus_hole(struct xdr_stream *xdr,
- struct nfs_pgio_args *args,
- struct nfs_pgio_res *res, uint32_t *eof)
+static int decode_read_plus_segment(struct xdr_stream *xdr,
+ struct read_plus_segment *seg)
{
- uint64_t offset, length, recvd;
__be32 *p;
- p = xdr_inline_decode(xdr, 8 + 8);
+ p = xdr_inline_decode(xdr, 4);
if (!p)
- return 1;
-
- p = xdr_decode_hyper(p, &offset);
- p = xdr_decode_hyper(p, &length);
- if (offset != args->offset + res->count) {
- /* Server returned an out-of-sequence extent */
- if (offset > args->offset + res->count ||
- offset + length < args->offset + res->count) {
- dprintk("NFS: server returned out of sequence extent: "
- "offset/size = %llu/%llu != expected %llu\n",
- (unsigned long long)offset,
- (unsigned long long)length,
- (unsigned long long)(args->offset +
- res->count));
- return 1;
- }
- length -= args->offset + res->count - offset;
- }
- if (length + res->count > args->count) {
- *eof = 0;
- if (unlikely(res->count >= args->count))
- return 1;
- length = args->count - res->count;
- }
- recvd = xdr_expand_hole(xdr, res->count, length);
- res->count += recvd;
+ return -EIO;
+ seg->type = be32_to_cpup(p++);
+
+ p = xdr_inline_decode(xdr, seg->type == NFS4_CONTENT_DATA ? 12 : 16);
+ if (!p)
+ return -EIO;
+ p = xdr_decode_hyper(p, &seg->offset);
- if (recvd < length)
- return 1;
+ if (seg->type == NFS4_CONTENT_DATA) {
+ struct xdr_buf buf;
+ uint32_t len = be32_to_cpup(p);
+
+ seg->data.length = len;
+ seg->data.from = xdr_stream_pos(xdr);
+
+ if (!xdr_stream_subsegment(xdr, &buf, xdr_align_size(len)))
+ return -EIO;
+ } else if (seg->type == NFS4_CONTENT_HOLE) {
+ xdr_decode_hyper(p, &seg->hole.length);
+ } else
+ return -EINVAL;
return 0;
}
+static int process_read_plus_segment(struct xdr_stream *xdr,
+ struct nfs_pgio_args *args,
+ struct nfs_pgio_res *res,
+ struct read_plus_segment *seg)
+{
+ unsigned long offset = seg->offset;
+ unsigned long length = read_plus_segment_length(seg);
+ unsigned int bufpos;
+
+ if (offset + length < args->offset)
+ return 0;
+ else if (offset > args->offset + args->count) {
+ res->eof = 0;
+ return 0;
+ } else if (offset < args->offset) {
+ length -= (args->offset - offset);
+ offset = args->offset;
+ } else if (offset + length > args->offset + args->count) {
+ length = (args->offset + args->count) - offset;
+ res->eof = 0;
+ }
+
+ bufpos = xdr->buf->head[0].iov_len + (offset - args->offset);
+ if (seg->type == NFS4_CONTENT_HOLE)
+ return xdr_stream_zero(xdr, bufpos, length);
+ else
+ return xdr_stream_move_subsegment(xdr, seg->data.from, bufpos, length);
+}
+
static int decode_read_plus(struct xdr_stream *xdr, struct nfs_pgio_res *res)
{
struct nfs_pgio_header *hdr =
container_of(res, struct nfs_pgio_header, res);
struct nfs_pgio_args *args = &hdr->args;
- uint32_t eof, segments, type;
+ uint32_t segments;
+ struct read_plus_segment *segs;
int status, i;
+ char scratch_buf[16];
__be32 *p;
status = decode_op_hdr(xdr, OP_READ_PLUS);
@@ -1112,38 +1133,31 @@ static int decode_read_plus(struct xdr_stream *xdr, struct nfs_pgio_res *res)
return -EIO;
res->count = 0;
- eof = be32_to_cpup(p++);
+ res->eof = be32_to_cpup(p++);
segments = be32_to_cpup(p++);
if (segments == 0)
- goto out;
-
- for (i = 0; i < segments; i++) {
- p = xdr_inline_decode(xdr, 4);
- if (!p)
- goto early_out;
+ return status;
- type = be32_to_cpup(p++);
- if (type == NFS4_CONTENT_DATA)
- status = decode_read_plus_data(xdr, args, res);
- else if (type == NFS4_CONTENT_HOLE)
- status = decode_read_plus_hole(xdr, args, res, &eof);
- else
- return -EINVAL;
+ segs = kmalloc_array(segments, sizeof(*segs), GFP_KERNEL);
+ if (!segs)
+ return -ENOMEM;
+ xdr_set_scratch_buffer(xdr, &scratch_buf, 32);
+ status = -EIO;
+ for (i = 0; i < segments; i++) {
+ status = decode_read_plus_segment(xdr, &segs[i]);
if (status < 0)
- return status;
- if (status > 0)
- goto early_out;
+ goto out;
}
+ xdr_set_pagelen(xdr, xdr_align_size(args->count));
+ for (i = segments; i > 0; i--)
+ res->count += process_read_plus_segment(xdr, args, res, &segs[i-1]);
+ status = 0;
+
out:
- res->eof = eof;
- return 0;
-early_out:
- if (unlikely(!i))
- return -EIO;
- res->eof = 0;
- return 0;
+ kfree(segs);
+ return status;
}
static int decode_seek(struct xdr_stream *xdr, struct nfs42_seek_res *res)
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index ed5eaca6801e..cfef738d765e 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -42,6 +42,7 @@ enum nfs4_client_state {
NFS4CLNT_LEASE_MOVED,
NFS4CLNT_DELEGATION_EXPIRED,
NFS4CLNT_RUN_MANAGER,
+ NFS4CLNT_MANAGER_AVAILABLE,
NFS4CLNT_RECALL_RUNNING,
NFS4CLNT_RECALL_ANY_LAYOUT_READ,
NFS4CLNT_RECALL_ANY_LAYOUT_RW,
@@ -260,8 +261,8 @@ struct nfs4_state_maintenance_ops {
};
struct nfs4_mig_recovery_ops {
- int (*get_locations)(struct inode *, struct nfs4_fs_locations *,
- struct page *, const struct cred *);
+ int (*get_locations)(struct nfs_server *, struct nfs_fh *,
+ struct nfs4_fs_locations *, struct page *, const struct cred *);
int (*fsid_present)(struct inode *, const struct cred *);
};
@@ -280,7 +281,8 @@ struct rpc_clnt *nfs4_negotiate_security(struct rpc_clnt *, struct inode *,
int nfs4_submount(struct fs_context *, struct nfs_server *);
int nfs4_replace_transport(struct nfs_server *server,
const struct nfs4_fs_locations *locations);
-
+size_t nfs_parse_server_name(char *string, size_t len, struct sockaddr_storage *ss,
+ size_t salen, struct net *net, int port);
/* nfs4proc.c */
extern int nfs4_handle_exception(struct nfs_server *, int, struct nfs4_exception *);
extern int nfs4_async_handle_error(struct rpc_task *task,
@@ -302,8 +304,9 @@ extern int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait);
extern int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle);
extern int nfs4_proc_fs_locations(struct rpc_clnt *, struct inode *, const struct qstr *,
struct nfs4_fs_locations *, struct page *);
-extern int nfs4_proc_get_locations(struct inode *, struct nfs4_fs_locations *,
- struct page *page, const struct cred *);
+extern int nfs4_proc_get_locations(struct nfs_server *, struct nfs_fh *,
+ struct nfs4_fs_locations *,
+ struct page *page, const struct cred *);
extern int nfs4_proc_fsid_present(struct inode *, const struct cred *);
extern struct rpc_clnt *nfs4_proc_lookup_mountpoint(struct inode *,
struct dentry *,
@@ -315,6 +318,8 @@ extern int nfs4_set_rw_stateid(nfs4_stateid *stateid,
const struct nfs_open_context *ctx,
const struct nfs_lock_context *l_ctx,
fmode_t fmode);
+extern void nfs4_bitmask_set(__u32 bitmask[], const __u32 src[],
+ struct inode *inode, unsigned long cache_validity);
extern int nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle,
struct nfs_fattr *fattr, struct inode *inode);
extern int update_open_stateid(struct nfs4_state *state,
@@ -454,7 +459,6 @@ struct nfs_client *nfs4_alloc_client(const struct nfs_client_initdata *);
/* nfs4renewd.c */
extern void nfs4_schedule_state_renewal(struct nfs_client *);
-extern void nfs4_renewd_prepare_shutdown(struct nfs_server *);
extern void nfs4_kill_renewd(struct nfs_client *);
extern void nfs4_renew_state(struct work_struct *);
extern void nfs4_set_lease_period(struct nfs_client *clp, unsigned long lease);
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index d8b5a250ca05..d3051b051a56 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -254,7 +254,7 @@ struct nfs_client *nfs4_alloc_client(const struct nfs_client_initdata *cl_init)
goto error;
ip_addr = (const char *)buf;
}
- strlcpy(clp->cl_ipaddr, ip_addr, sizeof(clp->cl_ipaddr));
+ strscpy(clp->cl_ipaddr, ip_addr, sizeof(clp->cl_ipaddr));
err = nfs_idmap_new(clp);
if (err < 0) {
@@ -346,6 +346,7 @@ int nfs40_init_client(struct nfs_client *clp)
ret = nfs4_setup_slot_table(tbl, NFS4_MAX_SLOT_TABLE,
"NFSv4.0 transport Slot table");
if (ret) {
+ nfs4_shutdown_slot_table(tbl);
kfree(tbl);
return ret;
}
@@ -889,7 +890,7 @@ nfs4_find_client_sessionid(struct net *net, const struct sockaddr *addr,
*/
static int nfs4_set_client(struct nfs_server *server,
const char *hostname,
- const struct sockaddr *addr,
+ const struct sockaddr_storage *addr,
const size_t addrlen,
const char *ip_addr,
int proto, const struct rpc_timeout *timeparms,
@@ -924,7 +925,7 @@ static int nfs4_set_client(struct nfs_server *server,
__set_bit(NFS_CS_MIGRATION, &cl_init.init_flags);
if (test_bit(NFS_MIG_TSM_POSSIBLE, &server->mig_status))
__set_bit(NFS_CS_TSM_POSSIBLE, &cl_init.init_flags);
- server->port = rpc_get_port(addr);
+ server->port = rpc_get_port((struct sockaddr *)addr);
/* Allocate or find a client reference we can use */
clp = nfs_get_client(&cl_init);
@@ -960,7 +961,7 @@ static int nfs4_set_client(struct nfs_server *server,
* the MDS.
*/
struct nfs_client *nfs4_set_ds_client(struct nfs_server *mds_srv,
- const struct sockaddr *ds_addr, int ds_addrlen,
+ const struct sockaddr_storage *ds_addr, int ds_addrlen,
int ds_proto, unsigned int ds_timeo, unsigned int ds_retrans,
u32 minor_version)
{
@@ -980,7 +981,7 @@ struct nfs_client *nfs4_set_ds_client(struct nfs_server *mds_srv,
};
char buf[INET6_ADDRSTRLEN + 1];
- if (rpc_ntop(ds_addr, buf, sizeof(buf)) <= 0)
+ if (rpc_ntop((struct sockaddr *)ds_addr, buf, sizeof(buf)) <= 0)
return ERR_PTR(-EINVAL);
cl_init.hostname = buf;
@@ -1148,7 +1149,7 @@ static int nfs4_init_server(struct nfs_server *server, struct fs_context *fc)
/* Get a client record */
error = nfs4_set_client(server,
ctx->nfs_server.hostname,
- &ctx->nfs_server.address,
+ &ctx->nfs_server._address,
ctx->nfs_server.addrlen,
ctx->client_address,
ctx->nfs_server.protocol,
@@ -1161,9 +1162,9 @@ static int nfs4_init_server(struct nfs_server *server, struct fs_context *fc)
return error;
if (ctx->rsize)
- server->rsize = nfs_block_size(ctx->rsize, NULL);
+ server->rsize = nfs_io_size(ctx->rsize, server->nfs_client->cl_proto);
if (ctx->wsize)
- server->wsize = nfs_block_size(ctx->wsize, NULL);
+ server->wsize = nfs_io_size(ctx->wsize, server->nfs_client->cl_proto);
server->acregmin = ctx->acregmin * HZ;
server->acregmax = ctx->acregmax * HZ;
@@ -1238,7 +1239,7 @@ struct nfs_server *nfs4_create_referral_server(struct fs_context *fc)
rpc_set_port(&ctx->nfs_server.address, NFS_RDMA_PORT);
error = nfs4_set_client(server,
ctx->nfs_server.hostname,
- &ctx->nfs_server.address,
+ &ctx->nfs_server._address,
ctx->nfs_server.addrlen,
parent_client->cl_ipaddr,
XPRT_TRANSPORT_RDMA,
@@ -1254,7 +1255,7 @@ struct nfs_server *nfs4_create_referral_server(struct fs_context *fc)
rpc_set_port(&ctx->nfs_server.address, NFS_PORT);
error = nfs4_set_client(server,
ctx->nfs_server.hostname,
- &ctx->nfs_server.address,
+ &ctx->nfs_server._address,
ctx->nfs_server.addrlen,
parent_client->cl_ipaddr,
XPRT_TRANSPORT_TCP,
@@ -1303,14 +1304,14 @@ error:
* Returns zero on success, or a negative errno value.
*/
int nfs4_update_server(struct nfs_server *server, const char *hostname,
- struct sockaddr *sap, size_t salen, struct net *net)
+ struct sockaddr_storage *sap, size_t salen, struct net *net)
{
struct nfs_client *clp = server->nfs_client;
struct rpc_clnt *clnt = server->client;
struct xprt_create xargs = {
.ident = clp->cl_proto,
.net = net,
- .dstaddr = sap,
+ .dstaddr = (struct sockaddr *)sap,
.addrlen = salen,
.servername = hostname,
};
@@ -1343,8 +1344,11 @@ int nfs4_update_server(struct nfs_server *server, const char *hostname,
}
nfs_put_client(clp);
- if (server->nfs_client->cl_hostname == NULL)
+ if (server->nfs_client->cl_hostname == NULL) {
server->nfs_client->cl_hostname = kstrdup(hostname, GFP_KERNEL);
+ if (server->nfs_client->cl_hostname == NULL)
+ return -ENOMEM;
+ }
nfs_server_insert_lists(server);
return nfs_probe_server(server, NFS_FH(d_inode(server->super->s_root)));
diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
index e79ae4cbc395..9eb181287879 100644
--- a/fs/nfs/nfs4file.c
+++ b/fs/nfs/nfs4file.c
@@ -32,6 +32,7 @@ nfs4_file_open(struct inode *inode, struct file *filp)
struct dentry *parent = NULL;
struct inode *dir;
unsigned openflags = filp->f_flags;
+ fmode_t f_mode;
struct iattr attr;
int err;
@@ -50,8 +51,9 @@ nfs4_file_open(struct inode *inode, struct file *filp)
if (err)
return err;
+ f_mode = filp->f_mode;
if ((openflags & O_ACCMODE) == 3)
- return nfs_open(inode, filp);
+ f_mode |= flags_to_mode(openflags);
/* We can't create new files here */
openflags &= ~(O_CREAT|O_EXCL);
@@ -59,7 +61,7 @@ nfs4_file_open(struct inode *inode, struct file *filp)
parent = dget_parent(dentry);
dir = d_inode(parent);
- ctx = alloc_nfs_open_context(file_dentry(filp), filp->f_mode, filp);
+ ctx = alloc_nfs_open_context(file_dentry(filp), f_mode, filp);
err = PTR_ERR(ctx);
if (IS_ERR(ctx))
goto out;
@@ -91,6 +93,7 @@ nfs4_file_open(struct inode *inode, struct file *filp)
nfs_file_set_open_context(filp, ctx);
nfs_fscache_open_file(inode, filp);
err = 0;
+ filp->f_mode |= FMODE_CAN_ODIRECT;
out_put_ctx:
put_nfs_open_context(ctx);
@@ -165,7 +168,7 @@ retry:
if (sync)
return -EOPNOTSUPP;
cn_resp = kzalloc(sizeof(struct nfs42_copy_notify_res),
- GFP_NOFS);
+ GFP_KERNEL);
if (unlikely(cn_resp == NULL))
return -ENOMEM;
@@ -180,8 +183,8 @@ retry:
ret = nfs42_proc_copy(file_in, pos_in, file_out, pos_out, count,
nss, cnrs, sync);
out:
- if (!nfs42_files_from_same_server(file_in, file_out))
- kfree(cn_resp);
+ kfree(cn_resp);
+
if (ret == -EAGAIN)
goto retry;
return ret;
@@ -326,7 +329,7 @@ static struct file *__nfs42_ssc_open(struct vfsmount *ss_mnt,
char *read_name = NULL;
int len, status = 0;
- server = NFS_SERVER(ss_mnt->mnt_root->d_inode);
+ server = NFS_SB(ss_mnt->mnt_sb);
if (!fattr)
return ERR_PTR(-ENOMEM);
@@ -337,14 +340,19 @@ static struct file *__nfs42_ssc_open(struct vfsmount *ss_mnt,
goto out;
}
+ if (!S_ISREG(fattr->mode)) {
+ res = ERR_PTR(-EBADF);
+ goto out;
+ }
+
res = ERR_PTR(-ENOMEM);
len = strlen(SSC_READ_NAME_BODY) + 16;
- read_name = kzalloc(len, GFP_NOFS);
+ read_name = kzalloc(len, GFP_KERNEL);
if (read_name == NULL)
goto out;
snprintf(read_name, len, SSC_READ_NAME_BODY, read_name_gen++);
- r_ino = nfs_fhget(ss_mnt->mnt_root->d_inode->i_sb, src_fh, fattr);
+ r_ino = nfs_fhget(ss_mnt->mnt_sb, src_fh, fattr);
if (IS_ERR(r_ino)) {
res = ERR_CAST(r_ino);
goto out_free_name;
@@ -354,6 +362,7 @@ static struct file *__nfs42_ssc_open(struct vfsmount *ss_mnt,
r_ino->i_fop);
if (IS_ERR(filep)) {
res = ERR_CAST(filep);
+ iput(r_ino);
goto out_free_name;
}
diff --git a/fs/nfs/nfs4idmap.c b/fs/nfs/nfs4idmap.c
index f331866dd418..e3fdd2f45b01 100644
--- a/fs/nfs/nfs4idmap.c
+++ b/fs/nfs/nfs4idmap.c
@@ -561,22 +561,20 @@ nfs_idmap_prepare_pipe_upcall(struct idmap *idmap,
return true;
}
-static void
-nfs_idmap_complete_pipe_upcall_locked(struct idmap *idmap, int ret)
+static void nfs_idmap_complete_pipe_upcall(struct idmap_legacy_upcalldata *data,
+ int ret)
{
- struct key *authkey = idmap->idmap_upcall_data->authkey;
-
- kfree(idmap->idmap_upcall_data);
- idmap->idmap_upcall_data = NULL;
- complete_request_key(authkey, ret);
- key_put(authkey);
+ complete_request_key(data->authkey, ret);
+ key_put(data->authkey);
+ kfree(data);
}
-static void
-nfs_idmap_abort_pipe_upcall(struct idmap *idmap, int ret)
+static void nfs_idmap_abort_pipe_upcall(struct idmap *idmap,
+ struct idmap_legacy_upcalldata *data,
+ int ret)
{
- if (idmap->idmap_upcall_data != NULL)
- nfs_idmap_complete_pipe_upcall_locked(idmap, ret);
+ if (cmpxchg(&idmap->idmap_upcall_data, data, NULL) == data)
+ nfs_idmap_complete_pipe_upcall(data, ret);
}
static int nfs_idmap_legacy_upcall(struct key *authkey, void *aux)
@@ -585,7 +583,7 @@ static int nfs_idmap_legacy_upcall(struct key *authkey, void *aux)
struct request_key_auth *rka = get_request_key_auth(authkey);
struct rpc_pipe_msg *msg;
struct idmap_msg *im;
- struct idmap *idmap = (struct idmap *)aux;
+ struct idmap *idmap = aux;
struct key *key = rka->target_key;
int ret = -ENOKEY;
@@ -613,7 +611,7 @@ static int nfs_idmap_legacy_upcall(struct key *authkey, void *aux)
ret = rpc_queue_upcall(idmap->idmap_pipe, msg);
if (ret < 0)
- nfs_idmap_abort_pipe_upcall(idmap, ret);
+ nfs_idmap_abort_pipe_upcall(idmap, data, ret);
return ret;
out2:
@@ -669,6 +667,7 @@ idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
struct request_key_auth *rka;
struct rpc_inode *rpci = RPC_I(file_inode(filp));
struct idmap *idmap = (struct idmap *)rpci->private;
+ struct idmap_legacy_upcalldata *data;
struct key *authkey;
struct idmap_msg im;
size_t namelen_in;
@@ -678,10 +677,11 @@ idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
* will have been woken up and someone else may now have used
* idmap_key_cons - so after this point we may no longer touch it.
*/
- if (idmap->idmap_upcall_data == NULL)
+ data = xchg(&idmap->idmap_upcall_data, NULL);
+ if (data == NULL)
goto out_noupcall;
- authkey = idmap->idmap_upcall_data->authkey;
+ authkey = data->authkey;
rka = get_request_key_auth(authkey);
if (mlen != sizeof(im)) {
@@ -703,18 +703,17 @@ idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
if (namelen_in == 0 || namelen_in == IDMAP_NAMESZ) {
ret = -EINVAL;
goto out;
-}
+ }
- ret = nfs_idmap_read_and_verify_message(&im,
- &idmap->idmap_upcall_data->idmap_msg,
- rka->target_key, authkey);
+ ret = nfs_idmap_read_and_verify_message(&im, &data->idmap_msg,
+ rka->target_key, authkey);
if (ret >= 0) {
key_set_timeout(rka->target_key, nfs_idmap_cache_timeout);
ret = mlen;
}
out:
- nfs_idmap_complete_pipe_upcall_locked(idmap, ret);
+ nfs_idmap_complete_pipe_upcall(data, ret);
out_noupcall:
return ret;
}
@@ -728,7 +727,7 @@ idmap_pipe_destroy_msg(struct rpc_pipe_msg *msg)
struct idmap *idmap = data->idmap;
if (msg->errno)
- nfs_idmap_abort_pipe_upcall(idmap, msg->errno);
+ nfs_idmap_abort_pipe_upcall(idmap, data, msg->errno);
}
static void
@@ -736,8 +735,11 @@ idmap_release_pipe(struct inode *inode)
{
struct rpc_inode *rpci = RPC_I(inode);
struct idmap *idmap = (struct idmap *)rpci->private;
+ struct idmap_legacy_upcalldata *data;
- nfs_idmap_abort_pipe_upcall(idmap, -EPIPE);
+ data = xchg(&idmap->idmap_upcall_data, NULL);
+ if (data)
+ nfs_idmap_complete_pipe_upcall(data, -EPIPE);
}
int nfs_map_name_to_uid(const struct nfs_server *server, const char *name, size_t namelen, kuid_t *uid)
diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c
index 873342308dc0..9a98595bb160 100644
--- a/fs/nfs/nfs4namespace.c
+++ b/fs/nfs/nfs4namespace.c
@@ -164,16 +164,22 @@ static int nfs4_validate_fspath(struct dentry *dentry,
return 0;
}
-static size_t nfs_parse_server_name(char *string, size_t len,
- struct sockaddr *sa, size_t salen, struct net *net)
+size_t nfs_parse_server_name(char *string, size_t len, struct sockaddr_storage *ss,
+ size_t salen, struct net *net, int port)
{
+ struct sockaddr *sa = (struct sockaddr *)ss;
ssize_t ret;
ret = rpc_pton(net, string, len, sa, salen);
if (ret == 0) {
- ret = nfs_dns_resolve_name(net, string, len, sa, salen);
- if (ret < 0)
- ret = 0;
+ ret = rpc_uaddr2sockaddr(net, string, len, sa, salen);
+ if (ret == 0) {
+ ret = nfs_dns_resolve_name(net, string, len, ss, salen);
+ if (ret < 0)
+ ret = 0;
+ }
+ } else if (port) {
+ rpc_set_port(sa, port);
}
return ret;
}
@@ -326,9 +332,9 @@ static int try_location(struct fs_context *fc,
ctx->nfs_server.addrlen =
nfs_parse_server_name(buf->data, buf->len,
- &ctx->nfs_server.address,
+ &ctx->nfs_server._address,
sizeof(ctx->nfs_server._address),
- fc->net_ns);
+ fc->net_ns, 0);
if (ctx->nfs_server.addrlen == 0)
continue;
@@ -412,6 +418,9 @@ static int nfs_do_refmount(struct fs_context *fc, struct rpc_clnt *client)
fs_locations = kmalloc(sizeof(struct nfs4_fs_locations), GFP_KERNEL);
if (!fs_locations)
goto out_free;
+ fs_locations->fattr = nfs_alloc_fattr();
+ if (!fs_locations->fattr)
+ goto out_free_2;
/* Get locations */
dentry = ctx->clone_data.dentry;
@@ -422,14 +431,16 @@ static int nfs_do_refmount(struct fs_context *fc, struct rpc_clnt *client)
err = nfs4_proc_fs_locations(client, d_inode(parent), &dentry->d_name, fs_locations, page);
dput(parent);
if (err != 0)
- goto out_free_2;
+ goto out_free_3;
err = -ENOENT;
if (fs_locations->nlocations <= 0 ||
fs_locations->fs_path.ncomponents <= 0)
- goto out_free_2;
+ goto out_free_3;
err = nfs_follow_referral(fc, fs_locations);
+out_free_3:
+ kfree(fs_locations->fattr);
out_free_2:
kfree(fs_locations);
out_free:
@@ -473,14 +484,13 @@ static int nfs4_try_replacing_one_location(struct nfs_server *server,
char *page, char *page2,
const struct nfs4_fs_location *location)
{
- const size_t addr_bufsize = sizeof(struct sockaddr_storage);
struct net *net = rpc_net_ns(server->client);
- struct sockaddr *sap;
+ struct sockaddr_storage *sap;
unsigned int s;
size_t salen;
int error;
- sap = kmalloc(addr_bufsize, GFP_KERNEL);
+ sap = kmalloc(sizeof(*sap), GFP_KERNEL);
if (sap == NULL)
return -ENOMEM;
@@ -496,10 +506,10 @@ static int nfs4_try_replacing_one_location(struct nfs_server *server,
continue;
salen = nfs_parse_server_name(buf->data, buf->len,
- sap, addr_bufsize, net);
+ sap, sizeof(*sap), net, 0);
if (salen == 0)
continue;
- rpc_set_port(sap, NFS_PORT);
+ rpc_set_port((struct sockaddr *)sap, NFS_PORT);
error = -ENOMEM;
hostname = kmemdup_nul(buf->data, buf->len, GFP_KERNEL);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index ee3bc79f6ca3..86ed5c0142c3 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -108,10 +108,6 @@ static int nfs41_test_stateid(struct nfs_server *, nfs4_stateid *,
static int nfs41_free_stateid(struct nfs_server *, const nfs4_stateid *,
const struct cred *, bool);
#endif
-static void nfs4_bitmask_set(__u32 bitmask[NFS4_BITMASK_SZ],
- const __u32 *src, struct inode *inode,
- struct nfs_server *server,
- struct nfs4_label *label);
#ifdef CONFIG_NFS_V4_SECURITY_LABEL
static inline struct nfs4_label *
@@ -367,6 +363,14 @@ static void nfs4_setup_readdir(u64 cookie, __be32 *verifier, struct dentry *dent
kunmap_atomic(start);
}
+static void nfs4_fattr_set_prechange(struct nfs_fattr *fattr, u64 version)
+{
+ if (!(fattr->valid & NFS_ATTR_FATTR_PRECHANGE)) {
+ fattr->pre_change_attr = version;
+ fattr->valid |= NFS_ATTR_FATTR_PRECHANGE;
+ }
+}
+
static void nfs4_test_and_free_stateid(struct nfs_server *server,
nfs4_stateid *stateid,
const struct cred *cred)
@@ -412,8 +416,8 @@ static int nfs4_delay_killable(long *timeout)
{
might_sleep();
- freezable_schedule_timeout_killable_unsafe(
- nfs4_update_delay(timeout));
+ __set_current_state(TASK_KILLABLE|TASK_FREEZABLE_UNSAFE);
+ schedule_timeout(nfs4_update_delay(timeout));
if (!__fatal_signal_pending(current))
return 0;
return -EINTR;
@@ -423,7 +427,8 @@ static int nfs4_delay_interruptible(long *timeout)
{
might_sleep();
- freezable_schedule_timeout_interruptible_unsafe(nfs4_update_delay(timeout));
+ __set_current_state(TASK_INTERRUPTIBLE|TASK_FREEZABLE_UNSAFE);
+ schedule_timeout(nfs4_update_delay(timeout));
if (!signal_pending(current))
return 0;
return __fatal_signal_pending(current) ? -EINTR :-ERESTARTSYS;
@@ -780,10 +785,9 @@ static void nfs4_slot_sequence_record_sent(struct nfs4_slot *slot,
if ((s32)(seqnr - slot->seq_nr_highest_sent) > 0)
slot->seq_nr_highest_sent = seqnr;
}
-static void nfs4_slot_sequence_acked(struct nfs4_slot *slot,
- u32 seqnr)
+static void nfs4_slot_sequence_acked(struct nfs4_slot *slot, u32 seqnr)
{
- slot->seq_nr_highest_sent = seqnr;
+ nfs4_slot_sequence_record_sent(slot, seqnr);
slot->seq_nr_last_acked = seqnr;
}
@@ -850,7 +854,6 @@ static int nfs41_sequence_process(struct rpc_task *task,
__func__,
slot->slot_nr,
slot->seq_nr);
- nfs4_slot_sequence_acked(slot, slot->seq_nr);
goto out_retry;
case -NFS4ERR_RETRY_UNCACHED_REP:
case -NFS4ERR_SEQ_FALSE_RETRY:
@@ -1158,7 +1161,7 @@ static int nfs4_call_sync_sequence(struct rpc_clnt *clnt,
{
unsigned short task_flags = 0;
- if (server->nfs_client->cl_minorversion)
+ if (server->caps & NFS_CAP_MOVEABLE)
task_flags = RPC_TASK_MOVEABLE;
return nfs4_do_call_sync(clnt, server, msg, args, res, task_flags);
}
@@ -1233,8 +1236,7 @@ nfs4_update_changeattr_locked(struct inode *inode,
NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL |
NFS_INO_INVALID_SIZE | NFS_INO_INVALID_OTHER |
NFS_INO_INVALID_BLOCKS | NFS_INO_INVALID_NLINK |
- NFS_INO_INVALID_MODE | NFS_INO_INVALID_XATTR |
- NFS_INO_REVAL_PAGECACHE;
+ NFS_INO_INVALID_MODE | NFS_INO_INVALID_XATTR;
nfsi->attrtimeo = NFS_MINATTRTIMEO(inode);
}
nfsi->attrtimeo_timestamp = jiffies;
@@ -1397,13 +1399,8 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry,
case NFS4_OPEN_CLAIM_FH:
p->o_arg.access = NFS4_ACCESS_READ | NFS4_ACCESS_MODIFY |
NFS4_ACCESS_EXTEND | NFS4_ACCESS_DELETE |
- NFS4_ACCESS_EXECUTE;
-#ifdef CONFIG_NFS_V4_2
- if (!(server->caps & NFS_CAP_XATTR))
- break;
- p->o_arg.access |= NFS4_ACCESS_XAREAD | NFS4_ACCESS_XAWRITE |
- NFS4_ACCESS_XALIST;
-#endif
+ NFS4_ACCESS_EXECUTE |
+ nfs_access_xattr_mask(server);
}
p->o_arg.clientid = server->nfs_client->cl_clientid;
p->o_arg.id.create_time = ktime_to_ns(sp->so_seqid.create_time);
@@ -2570,7 +2567,7 @@ static int nfs4_run_open_task(struct nfs4_opendata *data,
};
int status;
- if (server->nfs_client->cl_minorversion)
+ if (nfs_server_capable(dir, NFS_CAP_MOVEABLE))
task_setup_data.flags |= RPC_TASK_MOVEABLE;
kref_get(&data->kref);
@@ -2653,9 +2650,8 @@ static int nfs4_opendata_access(const struct cred *cred,
} else if ((fmode & FMODE_READ) && !opendata->file_created)
mask = NFS4_ACCESS_READ;
- cache.cred = cred;
nfs_access_set_mask(&cache, opendata->o_res.access_result);
- nfs_access_add_cache(state->inode, &cache);
+ nfs_access_add_cache(state->inode, &cache, cred);
flags = NFS4_ACCESS_READ | NFS4_ACCESS_EXECUTE | NFS4_ACCESS_LOOKUP;
if ((mask & ~cache.mask & flags) == 0)
@@ -3056,6 +3052,8 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata,
set_bit(NFS_STATE_POSIX_LOCKS, &state->flags);
if (opendata->o_res.rflags & NFS4_OPEN_RESULT_MAY_NOTIFY_LOCK)
set_bit(NFS_STATE_MAY_NOTIFY_LOCK, &state->flags);
+ if (opendata->o_res.rflags & NFS4_OPEN_RESULT_PRESERVE_UNLINKED)
+ set_bit(NFS_INO_PRESERVE_UNLINKED, &NFS_I(state->inode)->flags);
dentry = opendata->dentry;
if (d_really_is_negative(dentry)) {
@@ -3099,8 +3097,13 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata,
}
out:
- if (!opendata->cancelled)
+ if (!opendata->cancelled) {
+ if (opendata->lgp) {
+ nfs4_lgopen_release(opendata->lgp);
+ opendata->lgp = NULL;
+ }
nfs4_sequence_free_slot(&opendata->o_res.seq_res);
+ }
return ret;
}
@@ -3670,7 +3673,7 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
if (!nfs4_have_delegation(inode, FMODE_READ)) {
nfs4_bitmask_set(calldata->arg.bitmask_store,
server->cache_consistency_bitmask,
- inode, server, NULL);
+ inode, 0);
calldata->arg.bitmask = calldata->arg.bitmask_store;
} else
calldata->arg.bitmask = NULL;
@@ -3734,7 +3737,7 @@ int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait)
};
int status = -ENOMEM;
- if (server->nfs_client->cl_minorversion)
+ if (nfs_server_capable(state->inode, NFS_CAP_MOVEABLE))
task_setup_data.flags |= RPC_TASK_MOVEABLE;
nfs4_state_protect(server->nfs_client, NFS_SP4_MACH_CRED_CLEANUP,
@@ -3841,7 +3844,9 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f
FATTR4_WORD0_FH_EXPIRE_TYPE |
FATTR4_WORD0_LINK_SUPPORT |
FATTR4_WORD0_SYMLINK_SUPPORT |
- FATTR4_WORD0_ACLSUPPORT;
+ FATTR4_WORD0_ACLSUPPORT |
+ FATTR4_WORD0_CASE_INSENSITIVE |
+ FATTR4_WORD0_CASE_PRESERVING;
if (minorversion)
bitmask[2] = FATTR4_WORD2_SUPPATTR_EXCLCREAT;
@@ -3870,10 +3875,16 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f
server->caps |= NFS_CAP_HARDLINKS;
if (res.has_symlinks != 0)
server->caps |= NFS_CAP_SYMLINKS;
+ if (res.case_insensitive)
+ server->caps |= NFS_CAP_CASE_INSENSITIVE;
+ if (res.case_preserving)
+ server->caps |= NFS_CAP_CASE_PRESERVING;
#ifdef CONFIG_NFS_V4_SECURITY_LABEL
if (res.attr_bitmask[2] & FATTR4_WORD2_SECURITY_LABEL)
server->caps |= NFS_CAP_SECURITY_LABEL;
#endif
+ if (res.attr_bitmask[0] & FATTR4_WORD0_FS_LOCATIONS)
+ server->caps |= NFS_CAP_FS_LOCATIONS;
if (!(res.attr_bitmask[0] & FATTR4_WORD0_FILEID))
server->fattr_valid &= ~NFS_ATTR_FATTR_FILEID;
if (!(res.attr_bitmask[1] & FATTR4_WORD1_MODE))
@@ -3932,6 +3943,121 @@ int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle)
return err;
}
+static void test_fs_location_for_trunking(struct nfs4_fs_location *location,
+ struct nfs_client *clp,
+ struct nfs_server *server)
+{
+ int i;
+
+ for (i = 0; i < location->nservers; i++) {
+ struct nfs4_string *srv_loc = &location->servers[i];
+ struct sockaddr_storage addr;
+ size_t addrlen;
+ struct xprt_create xprt_args = {
+ .ident = 0,
+ .net = clp->cl_net,
+ };
+ struct nfs4_add_xprt_data xprtdata = {
+ .clp = clp,
+ };
+ struct rpc_add_xprt_test rpcdata = {
+ .add_xprt_test = clp->cl_mvops->session_trunk,
+ .data = &xprtdata,
+ };
+ char *servername = NULL;
+
+ if (!srv_loc->len)
+ continue;
+
+ addrlen = nfs_parse_server_name(srv_loc->data, srv_loc->len,
+ &addr, sizeof(addr),
+ clp->cl_net, server->port);
+ if (!addrlen)
+ return;
+ xprt_args.dstaddr = (struct sockaddr *)&addr;
+ xprt_args.addrlen = addrlen;
+ servername = kmalloc(srv_loc->len + 1, GFP_KERNEL);
+ if (!servername)
+ return;
+ memcpy(servername, srv_loc->data, srv_loc->len);
+ servername[srv_loc->len] = '\0';
+ xprt_args.servername = servername;
+
+ xprtdata.cred = nfs4_get_clid_cred(clp);
+ rpc_clnt_add_xprt(clp->cl_rpcclient, &xprt_args,
+ rpc_clnt_setup_test_and_add_xprt,
+ &rpcdata);
+ if (xprtdata.cred)
+ put_cred(xprtdata.cred);
+ kfree(servername);
+ }
+}
+
+static int _nfs4_discover_trunking(struct nfs_server *server,
+ struct nfs_fh *fhandle)
+{
+ struct nfs4_fs_locations *locations = NULL;
+ struct page *page;
+ const struct cred *cred;
+ struct nfs_client *clp = server->nfs_client;
+ const struct nfs4_state_maintenance_ops *ops =
+ clp->cl_mvops->state_renewal_ops;
+ int status = -ENOMEM, i;
+
+ cred = ops->get_state_renewal_cred(clp);
+ if (cred == NULL) {
+ cred = nfs4_get_clid_cred(clp);
+ if (cred == NULL)
+ return -ENOKEY;
+ }
+
+ page = alloc_page(GFP_KERNEL);
+ if (!page)
+ return -ENOMEM;
+ locations = kmalloc(sizeof(struct nfs4_fs_locations), GFP_KERNEL);
+ if (!locations)
+ goto out_free;
+ locations->fattr = nfs_alloc_fattr();
+ if (!locations->fattr)
+ goto out_free_2;
+
+ status = nfs4_proc_get_locations(server, fhandle, locations, page,
+ cred);
+ if (status)
+ goto out_free_3;
+
+ for (i = 0; i < locations->nlocations; i++)
+ test_fs_location_for_trunking(&locations->locations[i], clp,
+ server);
+out_free_3:
+ kfree(locations->fattr);
+out_free_2:
+ kfree(locations);
+out_free:
+ __free_page(page);
+ return status;
+}
+
+static int nfs4_discover_trunking(struct nfs_server *server,
+ struct nfs_fh *fhandle)
+{
+ struct nfs4_exception exception = {
+ .interruptible = true,
+ };
+ struct nfs_client *clp = server->nfs_client;
+ int err = 0;
+
+ if (!nfs4_has_session(clp))
+ goto out;
+ do {
+ err = nfs4_handle_exception(server,
+ _nfs4_discover_trunking(server, fhandle),
+ &exception);
+ } while (exception.retry);
+out:
+ return err;
+}
+
static int _nfs4_lookup_root(struct nfs_server *server, struct nfs_fh *fhandle,
struct nfs_fsinfo *info)
{
@@ -4128,6 +4254,8 @@ static int nfs4_get_referral(struct rpc_clnt *client, struct inode *dir,
if (locations == NULL)
goto out;
+ locations->fattr = fattr;
+
status = nfs4_proc_fs_locations(client, dir, name, locations, page);
if (status != 0)
goto out;
@@ -4137,17 +4265,14 @@ static int nfs4_get_referral(struct rpc_clnt *client, struct inode *dir,
* referral. Cause us to drop into the exception handler, which
* will kick off migration recovery.
*/
- if (nfs_fsid_equal(&NFS_SERVER(dir)->fsid, &locations->fattr.fsid)) {
+ if (nfs_fsid_equal(&NFS_SERVER(dir)->fsid, &fattr->fsid)) {
dprintk("%s: server did not return a different fsid for"
" a referral at %s\n", __func__, name->name);
status = -NFS4ERR_MOVED;
goto out;
}
/* Fixup attributes for the nfs_lookup() call to nfs_fhget() */
- nfs_fixup_referral_attributes(&locations->fattr);
-
- /* replace the lookup nfs_fattr with the locations nfs_fattr */
- memcpy(fattr, &locations->fattr, sizeof(struct nfs_fattr));
+ nfs_fixup_referral_attributes(fattr);
memset(fhandle, 0, sizeof(struct nfs_fh));
out:
if (page)
@@ -4289,7 +4414,7 @@ static int _nfs4_proc_lookup(struct rpc_clnt *clnt, struct inode *dir,
};
unsigned short task_flags = 0;
- if (server->nfs_client->cl_minorversion)
+ if (nfs_server_capable(dir, NFS_CAP_MOVEABLE))
task_flags = RPC_TASK_MOVEABLE;
/* Is this is an attribute revalidation, subject to softreval? */
@@ -4441,7 +4566,8 @@ static int nfs4_proc_lookupp(struct inode *inode, struct nfs_fh *fhandle,
return err;
}
-static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry)
+static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry,
+ const struct cred *cred)
{
struct nfs_server *server = NFS_SERVER(inode);
struct nfs4_accessargs args = {
@@ -4455,7 +4581,7 @@ static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry
.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_ACCESS],
.rpc_argp = &args,
.rpc_resp = &res,
- .rpc_cred = entry->cred,
+ .rpc_cred = cred,
};
int status = 0;
@@ -4475,14 +4601,15 @@ static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry
return status;
}
-static int nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry)
+static int nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry,
+ const struct cred *cred)
{
struct nfs4_exception exception = {
.interruptible = true,
};
int err;
do {
- err = _nfs4_proc_access(inode, entry);
+ err = _nfs4_proc_access(inode, entry, cred);
trace_nfs4_access(inode, err);
err = nfs4_handle_exception(NFS_SERVER(inode), err,
&exception);
@@ -4663,8 +4790,10 @@ static void nfs4_proc_unlink_setup(struct rpc_message *msg,
nfs_fattr_init(res->dir_attr);
- if (inode)
+ if (inode) {
nfs4_inode_return_delegation(inode);
+ nfs_d_prune_case_insensitive_aliases(inode);
+ }
}
static void nfs4_proc_unlink_rpc_prepare(struct rpc_task *task, struct nfs_unlinkdata *data)
@@ -4730,6 +4859,7 @@ static int nfs4_proc_rename_done(struct rpc_task *task, struct inode *old_dir,
return 0;
if (task->tk_status == 0) {
+ nfs_d_prune_case_insensitive_aliases(d_inode(data->old_dentry));
if (new_dir != old_dir) {
/* Note: If we moved a directory, nlink will change */
nfs4_update_changeattr(old_dir, &res->old_cinfo,
@@ -5422,14 +5552,14 @@ bool nfs4_write_need_cache_consistency_data(struct nfs_pgio_header *hdr)
return nfs4_have_delegation(hdr->inode, FMODE_READ) == 0;
}
-static void nfs4_bitmask_set(__u32 bitmask[NFS4_BITMASK_SZ], const __u32 *src,
- struct inode *inode, struct nfs_server *server,
- struct nfs4_label *label)
+void nfs4_bitmask_set(__u32 bitmask[], const __u32 src[],
+ struct inode *inode, unsigned long cache_validity)
{
- unsigned long cache_validity = READ_ONCE(NFS_I(inode)->cache_validity);
+ struct nfs_server *server = NFS_SERVER(inode);
unsigned int i;
memcpy(bitmask, src, sizeof(*bitmask) * NFS4_BITMASK_SZ);
+ cache_validity |= READ_ONCE(NFS_I(inode)->cache_validity);
if (cache_validity & NFS_INO_INVALID_CHANGE)
bitmask[0] |= FATTR4_WORD0_CHANGE;
@@ -5441,8 +5571,6 @@ static void nfs4_bitmask_set(__u32 bitmask[NFS4_BITMASK_SZ], const __u32 *src,
bitmask[1] |= FATTR4_WORD1_OWNER | FATTR4_WORD1_OWNER_GROUP;
if (cache_validity & NFS_INO_INVALID_NLINK)
bitmask[1] |= FATTR4_WORD1_NUMLINKS;
- if (label && label->len && cache_validity & NFS_INO_INVALID_LABEL)
- bitmask[2] |= FATTR4_WORD2_SECURITY_LABEL;
if (cache_validity & NFS_INO_INVALID_CTIME)
bitmask[1] |= FATTR4_WORD1_TIME_METADATA;
if (cache_validity & NFS_INO_INVALID_MTIME)
@@ -5469,7 +5597,7 @@ static void nfs4_proc_write_setup(struct nfs_pgio_header *hdr,
} else {
nfs4_bitmask_set(hdr->args.bitmask_store,
server->cache_consistency_bitmask,
- hdr->inode, server, NULL);
+ hdr->inode, NFS_INO_INVALID_BLOCKS);
hdr->args.bitmask = hdr->args.bitmask_store;
}
@@ -5650,9 +5778,17 @@ static int nfs4_proc_renew(struct nfs_client *clp, const struct cred *cred)
return 0;
}
-static inline int nfs4_server_supports_acls(struct nfs_server *server)
+static bool nfs4_server_supports_acls(const struct nfs_server *server,
+ enum nfs4_acl_type type)
{
- return server->caps & NFS_CAP_ACLS;
+ switch (type) {
+ default:
+ return server->attr_bitmask[0] & FATTR4_WORD0_ACL;
+ case NFS4ACL_DACL:
+ return server->attr_bitmask[1] & FATTR4_WORD1_DACL;
+ case NFS4ACL_SACL:
+ return server->attr_bitmask[1] & FATTR4_WORD1_SACL;
+ }
}
/* Assuming that XATTR_SIZE_MAX is a multiple of PAGE_SIZE, and that
@@ -5691,6 +5827,7 @@ unwind:
}
struct nfs4_cached_acl {
+ enum nfs4_acl_type type;
int cached;
size_t len;
char data[];
@@ -5711,7 +5848,8 @@ static void nfs4_zap_acl_attr(struct inode *inode)
nfs4_set_cached_acl(inode, NULL);
}
-static inline ssize_t nfs4_read_cached_acl(struct inode *inode, char *buf, size_t buflen)
+static ssize_t nfs4_read_cached_acl(struct inode *inode, char *buf,
+ size_t buflen, enum nfs4_acl_type type)
{
struct nfs_inode *nfsi = NFS_I(inode);
struct nfs4_cached_acl *acl;
@@ -5721,6 +5859,8 @@ static inline ssize_t nfs4_read_cached_acl(struct inode *inode, char *buf, size_
acl = nfsi->nfs4_acl;
if (acl == NULL)
goto out;
+ if (acl->type != type)
+ goto out;
if (buf == NULL) /* user is just asking for length */
goto out_len;
if (acl->cached == 0)
@@ -5736,7 +5876,9 @@ out:
return ret;
}
-static void nfs4_write_cached_acl(struct inode *inode, struct page **pages, size_t pgbase, size_t acl_len)
+static void nfs4_write_cached_acl(struct inode *inode, struct page **pages,
+ size_t pgbase, size_t acl_len,
+ enum nfs4_acl_type type)
{
struct nfs4_cached_acl *acl;
size_t buflen = sizeof(*acl) + acl_len;
@@ -5753,6 +5895,7 @@ static void nfs4_write_cached_acl(struct inode *inode, struct page **pages, size
goto out;
acl->cached = 0;
}
+ acl->type = type;
acl->len = acl_len;
out:
nfs4_set_cached_acl(inode, acl);
@@ -5768,14 +5911,17 @@ out:
* length. The next getxattr call will then produce another round trip to
* the server, this time with the input buf of the required size.
*/
-static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t buflen)
+static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf,
+ size_t buflen, enum nfs4_acl_type type)
{
struct page **pages;
struct nfs_getaclargs args = {
.fh = NFS_FH(inode),
+ .acl_type = type,
.acl_len = buflen,
};
struct nfs_getaclres res = {
+ .acl_type = type,
.acl_len = buflen,
};
struct rpc_message msg = {
@@ -5791,7 +5937,7 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bu
buflen = server->rsize;
npages = DIV_ROUND_UP(buflen, PAGE_SIZE) + 1;
- pages = kmalloc_array(npages, sizeof(struct page *), GFP_NOFS);
+ pages = kmalloc_array(npages, sizeof(struct page *), GFP_KERNEL);
if (!pages)
return -ENOMEM;
@@ -5825,7 +5971,8 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bu
ret = -ERANGE;
goto out_free;
}
- nfs4_write_cached_acl(inode, pages, res.acl_data_offset, res.acl_len);
+ nfs4_write_cached_acl(inode, pages, res.acl_data_offset, res.acl_len,
+ type);
if (buf) {
if (res.acl_len > buflen) {
ret = -ERANGE;
@@ -5845,14 +5992,15 @@ out_free:
return ret;
}
-static ssize_t nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t buflen)
+static ssize_t nfs4_get_acl_uncached(struct inode *inode, void *buf,
+ size_t buflen, enum nfs4_acl_type type)
{
struct nfs4_exception exception = {
.interruptible = true,
};
ssize_t ret;
do {
- ret = __nfs4_get_acl_uncached(inode, buf, buflen);
+ ret = __nfs4_get_acl_uncached(inode, buf, buflen, type);
trace_nfs4_get_acl(inode, ret);
if (ret >= 0)
break;
@@ -5861,34 +6009,37 @@ static ssize_t nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bufl
return ret;
}
-static ssize_t nfs4_proc_get_acl(struct inode *inode, void *buf, size_t buflen)
+static ssize_t nfs4_proc_get_acl(struct inode *inode, void *buf, size_t buflen,
+ enum nfs4_acl_type type)
{
struct nfs_server *server = NFS_SERVER(inode);
int ret;
- if (!nfs4_server_supports_acls(server))
+ if (!nfs4_server_supports_acls(server, type))
return -EOPNOTSUPP;
ret = nfs_revalidate_inode(inode, NFS_INO_INVALID_CHANGE);
if (ret < 0)
return ret;
if (NFS_I(inode)->cache_validity & NFS_INO_INVALID_ACL)
nfs_zap_acl_cache(inode);
- ret = nfs4_read_cached_acl(inode, buf, buflen);
+ ret = nfs4_read_cached_acl(inode, buf, buflen, type);
if (ret != -ENOENT)
/* -ENOENT is returned if there is no ACL or if there is an ACL
* but no cached acl data, just the acl length */
return ret;
- return nfs4_get_acl_uncached(inode, buf, buflen);
+ return nfs4_get_acl_uncached(inode, buf, buflen, type);
}
-static int __nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t buflen)
+static int __nfs4_proc_set_acl(struct inode *inode, const void *buf,
+ size_t buflen, enum nfs4_acl_type type)
{
struct nfs_server *server = NFS_SERVER(inode);
struct page *pages[NFS4ACL_MAXPAGES];
struct nfs_setaclargs arg = {
- .fh = NFS_FH(inode),
- .acl_pages = pages,
- .acl_len = buflen,
+ .fh = NFS_FH(inode),
+ .acl_type = type,
+ .acl_len = buflen,
+ .acl_pages = pages,
};
struct nfs_setaclres res;
struct rpc_message msg = {
@@ -5902,7 +6053,7 @@ static int __nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t bufl
/* You can't remove system.nfs4_acl: */
if (buflen == 0)
return -EINVAL;
- if (!nfs4_server_supports_acls(server))
+ if (!nfs4_server_supports_acls(server, type))
return -EOPNOTSUPP;
if (npages > ARRAY_SIZE(pages))
return -ERANGE;
@@ -5933,12 +6084,13 @@ static int __nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t bufl
return ret;
}
-static int nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t buflen)
+static int nfs4_proc_set_acl(struct inode *inode, const void *buf,
+ size_t buflen, enum nfs4_acl_type type)
{
struct nfs4_exception exception = { };
int err;
do {
- err = __nfs4_proc_set_acl(inode, buf, buflen);
+ err = __nfs4_proc_set_acl(inode, buf, buflen, type);
trace_nfs4_set_acl(inode, err);
if (err == -NFS4ERR_BADOWNER || err == -NFS4ERR_BADNAME) {
/*
@@ -6443,7 +6595,9 @@ static void nfs4_delegreturn_release(void *calldata)
pnfs_roc_release(&data->lr.arg, &data->lr.res,
data->res.lr_ret);
if (inode) {
- nfs_post_op_update_inode_force_wcc(inode, &data->fattr);
+ nfs4_fattr_set_prechange(&data->fattr,
+ inode_peek_iversion_raw(inode));
+ nfs_refresh_inode(inode, &data->fattr);
nfs_iput_and_deactive(inode);
}
kfree(calldata);
@@ -6454,7 +6608,7 @@ static void nfs4_delegreturn_prepare(struct rpc_task *task, void *data)
struct nfs4_delegreturndata *d_data;
struct pnfs_layout_hdr *lo;
- d_data = (struct nfs4_delegreturndata *)data;
+ d_data = data;
if (!d_data->lr.roc && nfs4_wait_on_layoutreturn(d_data->inode, task)) {
nfs4_sequence_done(task, &d_data->res.seq_res);
@@ -6492,11 +6646,14 @@ static int _nfs4_proc_delegreturn(struct inode *inode, const struct cred *cred,
.rpc_client = server->client,
.rpc_message = &msg,
.callback_ops = &nfs4_delegreturn_ops,
- .flags = RPC_TASK_ASYNC | RPC_TASK_TIMEOUT | RPC_TASK_MOVEABLE,
+ .flags = RPC_TASK_ASYNC | RPC_TASK_TIMEOUT,
};
int status = 0;
- data = kzalloc(sizeof(*data), GFP_NOFS);
+ if (nfs_server_capable(inode, NFS_CAP_MOVEABLE))
+ task_setup_data.flags |= RPC_TASK_MOVEABLE;
+
+ data = kzalloc(sizeof(*data), GFP_KERNEL);
if (data == NULL)
return -ENOMEM;
@@ -6507,8 +6664,7 @@ static int _nfs4_proc_delegreturn(struct inode *inode, const struct cred *cred,
data->args.fhandle = &data->fh;
data->args.stateid = &data->stateid;
nfs4_bitmask_set(data->args.bitmask_store,
- server->cache_consistency_bitmask, inode, server,
- NULL);
+ server->cache_consistency_bitmask, inode, 0);
data->args.bitmask = data->args.bitmask_store;
nfs_copy_fh(&data->fh, NFS_FH(inode));
nfs4_stateid_copy(&data->stateid, stateid);
@@ -6685,7 +6841,7 @@ static struct nfs4_unlockdata *nfs4_alloc_unlockdata(struct file_lock *fl,
struct nfs4_state *state = lsp->ls_state;
struct inode *inode = state->inode;
- p = kzalloc(sizeof(*p), GFP_NOFS);
+ p = kzalloc(sizeof(*p), GFP_KERNEL);
if (p == NULL)
return NULL;
p->arg.fh = NFS_FH(inode);
@@ -6810,10 +6966,8 @@ static struct rpc_task *nfs4_do_unlck(struct file_lock *fl,
.workqueue = nfsiod_workqueue,
.flags = RPC_TASK_ASYNC,
};
- struct nfs_client *client =
- NFS_SERVER(lsp->ls_state->inode)->nfs_client;
- if (client->cl_minorversion)
+ if (nfs_server_capable(lsp->ls_state->inode, NFS_CAP_MOVEABLE))
task_setup_data.flags |= RPC_TASK_MOVEABLE;
nfs4_state_protect(NFS_SERVER(lsp->ls_state->inode)->nfs_client,
@@ -6984,6 +7138,7 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata)
{
struct nfs4_lockdata *data = calldata;
struct nfs4_lock_state *lsp = data->lsp;
+ struct nfs_server *server = NFS_SERVER(d_inode(data->ctx->dentry));
if (!nfs4_sequence_done(task, &data->res.seq_res))
return;
@@ -6991,8 +7146,7 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata)
data->rpc_status = task->tk_status;
switch (task->tk_status) {
case 0:
- renew_lease(NFS_SERVER(d_inode(data->ctx->dentry)),
- data->timestamp);
+ renew_lease(server, data->timestamp);
if (data->arg.new_lock && !data->cancelled) {
data->fl.fl_flags &= ~(FL_SLEEP | FL_ACCESS);
if (locks_lock_inode_wait(lsp->ls_state->inode, &data->fl) < 0)
@@ -7013,6 +7167,8 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata)
if (!nfs4_stateid_match(&data->arg.open_stateid,
&lsp->ls_state->open_stateid))
goto out_restart;
+ else if (nfs4_async_handle_error(task, server, lsp->ls_state, NULL) == -EAGAIN)
+ goto out_restart;
} else if (!nfs4_stateid_match(&data->arg.lock_stateid,
&lsp->ls_stateid))
goto out_restart;
@@ -7084,14 +7240,12 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f
.flags = RPC_TASK_ASYNC | RPC_TASK_CRED_NOREF,
};
int ret;
- struct nfs_client *client = NFS_SERVER(state->inode)->nfs_client;
- if (client->cl_minorversion)
+ if (nfs_server_capable(state->inode, NFS_CAP_MOVEABLE))
task_setup_data.flags |= RPC_TASK_MOVEABLE;
data = nfs4_alloc_lockdata(fl, nfs_file_open_context(fl->fl_file),
- fl->fl_u.nfs4_fl.owner,
- recovery_type == NFS_LOCK_NEW ? GFP_KERNEL : GFP_NOFS);
+ fl->fl_u.nfs4_fl.owner, GFP_KERNEL);
if (data == NULL)
return -ENOMEM;
if (IS_SETLKW(cmd))
@@ -7255,7 +7409,8 @@ nfs4_retry_setlk_simple(struct nfs4_state *state, int cmd,
status = nfs4_proc_setlk(state, cmd, request);
if ((status != -EAGAIN) || IS_SETLK(cmd))
break;
- freezable_schedule_timeout_interruptible(timeout);
+ __set_current_state(TASK_INTERRUPTIBLE|TASK_FREEZABLE);
+ schedule_timeout(timeout);
timeout *= 2;
timeout = min_t(unsigned long, NFS4_LOCK_MAXTIMEOUT, timeout);
status = -ERESTARTSYS;
@@ -7323,10 +7478,8 @@ nfs4_retry_setlk(struct nfs4_state *state, int cmd, struct file_lock *request)
break;
status = -ERESTARTSYS;
- freezer_do_not_count();
- wait_woken(&waiter.wait, TASK_INTERRUPTIBLE,
+ wait_woken(&waiter.wait, TASK_INTERRUPTIBLE|TASK_FREEZABLE,
NFS4_LOCK_MAXTIMEOUT);
- freezer_count();
} while (!signalled());
remove_wait_queue(q, &waiter.wait);
@@ -7514,7 +7667,7 @@ nfs4_release_lockowner(struct nfs_server *server, struct nfs4_lock_state *lsp)
if (server->nfs_client->cl_mvops->minor_version != 0)
return;
- data = kmalloc(sizeof(*data), GFP_NOFS);
+ data = kmalloc(sizeof(*data), GFP_KERNEL);
if (!data)
return;
data->lsp = lsp;
@@ -7537,21 +7690,70 @@ static int nfs4_xattr_set_nfs4_acl(const struct xattr_handler *handler,
const char *key, const void *buf,
size_t buflen, int flags)
{
- return nfs4_proc_set_acl(inode, buf, buflen);
+ return nfs4_proc_set_acl(inode, buf, buflen, NFS4ACL_ACL);
}
static int nfs4_xattr_get_nfs4_acl(const struct xattr_handler *handler,
struct dentry *unused, struct inode *inode,
const char *key, void *buf, size_t buflen)
{
- return nfs4_proc_get_acl(inode, buf, buflen);
+ return nfs4_proc_get_acl(inode, buf, buflen, NFS4ACL_ACL);
}
static bool nfs4_xattr_list_nfs4_acl(struct dentry *dentry)
{
- return nfs4_server_supports_acls(NFS_SERVER(d_inode(dentry)));
+ return nfs4_server_supports_acls(NFS_SB(dentry->d_sb), NFS4ACL_ACL);
}
+#if defined(CONFIG_NFS_V4_1)
+#define XATTR_NAME_NFSV4_DACL "system.nfs4_dacl"
+
+static int nfs4_xattr_set_nfs4_dacl(const struct xattr_handler *handler,
+ struct user_namespace *mnt_userns,
+ struct dentry *unused, struct inode *inode,
+ const char *key, const void *buf,
+ size_t buflen, int flags)
+{
+ return nfs4_proc_set_acl(inode, buf, buflen, NFS4ACL_DACL);
+}
+
+static int nfs4_xattr_get_nfs4_dacl(const struct xattr_handler *handler,
+ struct dentry *unused, struct inode *inode,
+ const char *key, void *buf, size_t buflen)
+{
+ return nfs4_proc_get_acl(inode, buf, buflen, NFS4ACL_DACL);
+}
+
+static bool nfs4_xattr_list_nfs4_dacl(struct dentry *dentry)
+{
+ return nfs4_server_supports_acls(NFS_SB(dentry->d_sb), NFS4ACL_DACL);
+}
+
+#define XATTR_NAME_NFSV4_SACL "system.nfs4_sacl"
+
+static int nfs4_xattr_set_nfs4_sacl(const struct xattr_handler *handler,
+ struct user_namespace *mnt_userns,
+ struct dentry *unused, struct inode *inode,
+ const char *key, const void *buf,
+ size_t buflen, int flags)
+{
+ return nfs4_proc_set_acl(inode, buf, buflen, NFS4ACL_SACL);
+}
+
+static int nfs4_xattr_get_nfs4_sacl(const struct xattr_handler *handler,
+ struct dentry *unused, struct inode *inode,
+ const char *key, void *buf, size_t buflen)
+{
+ return nfs4_proc_get_acl(inode, buf, buflen, NFS4ACL_SACL);
+}
+
+static bool nfs4_xattr_list_nfs4_sacl(struct dentry *dentry)
+{
+ return nfs4_server_supports_acls(NFS_SB(dentry->d_sb), NFS4ACL_SACL);
+}
+
+#endif
+
#ifdef CONFIG_NFS_V4_SECURITY_LABEL
static int nfs4_xattr_set_nfs4_label(const struct xattr_handler *handler,
@@ -7611,7 +7813,7 @@ static int nfs4_xattr_set_nfs4_user(const struct xattr_handler *handler,
const char *key, const void *buf,
size_t buflen, int flags)
{
- struct nfs_access_entry cache;
+ u32 mask;
int ret;
if (!nfs_server_capable(inode, NFS_CAP_XATTR))
@@ -7626,8 +7828,8 @@ static int nfs4_xattr_set_nfs4_user(const struct xattr_handler *handler,
* do a cached access check for the XA* flags to possibly avoid
* doing an RPC and getting EACCES back.
*/
- if (!nfs_access_get_cached(inode, current_cred(), &cache, true)) {
- if (!(cache.mask & NFS_ACCESS_XAWRITE))
+ if (!nfs_access_get_cached(inode, current_cred(), &mask, true)) {
+ if (!(mask & NFS_ACCESS_XAWRITE))
return -EACCES;
}
@@ -7648,14 +7850,14 @@ static int nfs4_xattr_get_nfs4_user(const struct xattr_handler *handler,
struct dentry *unused, struct inode *inode,
const char *key, void *buf, size_t buflen)
{
- struct nfs_access_entry cache;
+ u32 mask;
ssize_t ret;
if (!nfs_server_capable(inode, NFS_CAP_XATTR))
return -EOPNOTSUPP;
- if (!nfs_access_get_cached(inode, current_cred(), &cache, true)) {
- if (!(cache.mask & NFS_ACCESS_XAREAD))
+ if (!nfs_access_get_cached(inode, current_cred(), &mask, true)) {
+ if (!(mask & NFS_ACCESS_XAREAD))
return -EACCES;
}
@@ -7680,13 +7882,13 @@ nfs4_listxattr_nfs4_user(struct inode *inode, char *list, size_t list_len)
ssize_t ret, size;
char *buf;
size_t buflen;
- struct nfs_access_entry cache;
+ u32 mask;
if (!nfs_server_capable(inode, NFS_CAP_XATTR))
return 0;
- if (!nfs_access_get_cached(inode, current_cred(), &cache, true)) {
- if (!(cache.mask & NFS_ACCESS_XALIST))
+ if (!nfs_access_get_cached(inode, current_cred(), &mask, true)) {
+ if (!(mask & NFS_ACCESS_XALIST))
return 0;
}
@@ -7784,7 +7986,7 @@ static int _nfs4_proc_fs_locations(struct rpc_clnt *client, struct inode *dir,
else
bitmask[1] &= ~FATTR4_WORD1_MOUNTED_ON_FILEID;
- nfs_fattr_init(&fs_locations->fattr);
+ nfs_fattr_init(fs_locations->fattr);
fs_locations->server = server;
fs_locations->nlocations = 0;
status = nfs4_call_sync(client, server, &msg, &args.seq_args, &res.seq_res, 0);
@@ -7818,18 +8020,18 @@ int nfs4_proc_fs_locations(struct rpc_clnt *client, struct inode *dir,
* appended to this compound to identify the client ID which is
* performing recovery.
*/
-static int _nfs40_proc_get_locations(struct inode *inode,
+static int _nfs40_proc_get_locations(struct nfs_server *server,
+ struct nfs_fh *fhandle,
struct nfs4_fs_locations *locations,
struct page *page, const struct cred *cred)
{
- struct nfs_server *server = NFS_SERVER(inode);
struct rpc_clnt *clnt = server->client;
u32 bitmask[2] = {
[0] = FATTR4_WORD0_FSID | FATTR4_WORD0_FS_LOCATIONS,
};
struct nfs4_fs_locations_arg args = {
.clientid = server->nfs_client->cl_clientid,
- .fh = NFS_FH(inode),
+ .fh = fhandle,
.page = page,
.bitmask = bitmask,
.migration = 1, /* skip LOOKUP */
@@ -7849,7 +8051,7 @@ static int _nfs40_proc_get_locations(struct inode *inode,
unsigned long now = jiffies;
int status;
- nfs_fattr_init(&locations->fattr);
+ nfs_fattr_init(locations->fattr);
locations->server = server;
locations->nlocations = 0;
@@ -7875,17 +8077,17 @@ static int _nfs40_proc_get_locations(struct inode *inode,
* When the client supports GETATTR(fs_locations_info), it can
* be plumbed in here.
*/
-static int _nfs41_proc_get_locations(struct inode *inode,
+static int _nfs41_proc_get_locations(struct nfs_server *server,
+ struct nfs_fh *fhandle,
struct nfs4_fs_locations *locations,
struct page *page, const struct cred *cred)
{
- struct nfs_server *server = NFS_SERVER(inode);
struct rpc_clnt *clnt = server->client;
u32 bitmask[2] = {
[0] = FATTR4_WORD0_FSID | FATTR4_WORD0_FS_LOCATIONS,
};
struct nfs4_fs_locations_arg args = {
- .fh = NFS_FH(inode),
+ .fh = fhandle,
.page = page,
.bitmask = bitmask,
.migration = 1, /* skip LOOKUP */
@@ -7900,15 +8102,26 @@ static int _nfs41_proc_get_locations(struct inode *inode,
.rpc_resp = &res,
.rpc_cred = cred,
};
+ struct nfs4_call_sync_data data = {
+ .seq_server = server,
+ .seq_args = &args.seq_args,
+ .seq_res = &res.seq_res,
+ };
+ struct rpc_task_setup task_setup_data = {
+ .rpc_client = clnt,
+ .rpc_message = &msg,
+ .callback_ops = server->nfs_client->cl_mvops->call_sync_ops,
+ .callback_data = &data,
+ .flags = RPC_TASK_NO_ROUND_ROBIN,
+ };
int status;
- nfs_fattr_init(&locations->fattr);
+ nfs_fattr_init(locations->fattr);
locations->server = server;
locations->nlocations = 0;
nfs4_init_sequence(&args.seq_args, &res.seq_res, 0, 1);
- status = nfs4_call_sync_sequence(clnt, server, &msg,
- &args.seq_args, &res.seq_res);
+ status = nfs4_call_sync_custom(&task_setup_data);
if (status == NFS4_OK &&
res.seq_res.sr_status_flags & SEQ4_STATUS_LEASE_MOVED)
status = -NFS4ERR_LEASE_MOVED;
@@ -7919,7 +8132,8 @@ static int _nfs41_proc_get_locations(struct inode *inode,
/**
* nfs4_proc_get_locations - discover locations for a migrated FSID
- * @inode: inode on FSID that is migrating
+ * @server: pointer to nfs_server to process
+ * @fhandle: pointer to the kernel NFS client file handle
* @locations: result of query
* @page: buffer
* @cred: credential to use for this operation
@@ -7934,11 +8148,11 @@ static int _nfs41_proc_get_locations(struct inode *inode,
* -NFS4ERR_LEASE_MOVED is returned if the server still has leases
* from this client that require migration recovery.
*/
-int nfs4_proc_get_locations(struct inode *inode,
+int nfs4_proc_get_locations(struct nfs_server *server,
+ struct nfs_fh *fhandle,
struct nfs4_fs_locations *locations,
struct page *page, const struct cred *cred)
{
- struct nfs_server *server = NFS_SERVER(inode);
struct nfs_client *clp = server->nfs_client;
const struct nfs4_mig_recovery_ops *ops =
clp->cl_mvops->mig_recovery_ops;
@@ -7951,10 +8165,11 @@ int nfs4_proc_get_locations(struct inode *inode,
(unsigned long long)server->fsid.major,
(unsigned long long)server->fsid.minor,
clp->cl_hostname);
- nfs_display_fhandle(NFS_FH(inode), __func__);
+ nfs_display_fhandle(fhandle, __func__);
do {
- status = ops->get_locations(inode, locations, page, cred);
+ status = ops->get_locations(server, fhandle, locations, page,
+ cred);
if (status != -NFS4ERR_DELAY)
break;
nfs4_handle_exception(server, status, &exception);
@@ -8219,6 +8434,7 @@ nfs4_bind_one_conn_to_session_done(struct rpc_task *task, void *calldata)
case -NFS4ERR_DEADSESSION:
nfs4_schedule_session_recovery(clp->cl_session,
task->tk_status);
+ return;
}
if (args->dir == NFS4_CDFC4_FORE_OR_BOTH &&
res->dir != NFS4_CDFS4_BOTH) {
@@ -8686,7 +8902,7 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, const struct cred *cred)
void nfs4_test_session_trunk(struct rpc_clnt *clnt, struct rpc_xprt *xprt,
void *data)
{
- struct nfs4_add_xprt_data *adata = (struct nfs4_add_xprt_data *)data;
+ struct nfs4_add_xprt_data *adata = data;
struct rpc_task *task;
int status;
@@ -8709,6 +8925,9 @@ void nfs4_test_session_trunk(struct rpc_clnt *clnt, struct rpc_xprt *xprt,
if (status == 0)
rpc_clnt_xprt_switch_add_xprt(clnt, xprt);
+ else if (rpc_clnt_xprt_switch_has_addr(clnt,
+ (struct sockaddr *)&xprt->addr))
+ rpc_clnt_xprt_switch_remove_xprt(clnt, xprt);
rpc_put_task(task);
}
@@ -9033,6 +9252,13 @@ int nfs4_proc_create_session(struct nfs_client *clp, const struct cred *cred)
int status;
unsigned *ptr;
struct nfs4_session *session = clp->cl_session;
+ struct nfs4_add_xprt_data xprtdata = {
+ .clp = clp,
+ };
+ struct rpc_add_xprt_test rpcdata = {
+ .add_xprt_test = clp->cl_mvops->session_trunk,
+ .data = &xprtdata,
+ };
dprintk("--> %s clp=%p session=%p\n", __func__, clp, session);
@@ -9049,6 +9275,7 @@ int nfs4_proc_create_session(struct nfs_client *clp, const struct cred *cred)
ptr = (unsigned *)&session->sess_id.data[0];
dprintk("%s client>seqid %d sessionid %u:%u:%u:%u\n", __func__,
clp->cl_seqid, ptr[0], ptr[1], ptr[2], ptr[3]);
+ rpc_clnt_probe_trunked_xprts(clp->cl_rpcclient, &rpcdata);
out:
return status;
}
@@ -9078,6 +9305,7 @@ int nfs4_proc_destroy_session(struct nfs4_session *session,
if (status)
dprintk("NFS: Got error %d from the server on DESTROY_SESSION. "
"Session has been destroyed regardless...\n", status);
+ rpc_clnt_manage_trunked_xprts(session->clp->cl_rpcclient);
return status;
}
@@ -9177,7 +9405,7 @@ static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp,
goto out_err;
ret = ERR_PTR(-ENOMEM);
- calldata = kzalloc(sizeof(*calldata), GFP_NOFS);
+ calldata = kzalloc(sizeof(*calldata), GFP_KERNEL);
if (calldata == NULL)
goto out_put_clp;
nfs4_init_sequence(&calldata->args, &calldata->res, 0, is_privileged);
@@ -9262,6 +9490,9 @@ static int nfs41_reclaim_complete_handle_errors(struct rpc_task *task, struct nf
rpc_delay(task, NFS4_POLL_RETRY_MAX);
fallthrough;
case -NFS4ERR_RETRY_UNCACHED_REP:
+ case -EACCES:
+ dprintk("%s: failed to reclaim complete error %d for server %s, retrying\n",
+ __func__, task->tk_status, clp->cl_hostname);
return -EAGAIN;
case -NFS4ERR_BADSESSION:
case -NFS4ERR_DEADSESSION:
@@ -9493,6 +9724,8 @@ nfs4_proc_layoutget(struct nfs4_layoutget *lgp, long *timeout)
nfs4_init_sequence(&lgp->args.seq_args, &lgp->res.seq_res, 0, 0);
task = rpc_run_task(&task_setup_data);
+ if (IS_ERR(task))
+ return ERR_CAST(task);
status = rpc_wait_for_completion_task(task);
if (status != 0)
@@ -10108,7 +10341,7 @@ static int nfs41_free_stateid(struct nfs_server *server,
&task_setup.rpc_client, &msg);
dprintk("NFS call free_stateid %p\n", stateid);
- data = kmalloc(sizeof(*data), GFP_NOFS);
+ data = kmalloc(sizeof(*data), GFP_KERNEL);
if (!data)
return -ENOMEM;
data->server = server;
@@ -10257,7 +10490,8 @@ static const struct nfs4_minor_version_ops nfs_v4_1_minor_ops = {
| NFS_CAP_POSIX_LOCK
| NFS_CAP_STATEID_NFSV41
| NFS_CAP_ATOMIC_OPEN_V1
- | NFS_CAP_LGOPEN,
+ | NFS_CAP_LGOPEN
+ | NFS_CAP_MOVEABLE,
.init_client = nfs41_init_client,
.shutdown_client = nfs41_shutdown_client,
.match_stateid = nfs41_match_stateid,
@@ -10292,7 +10526,8 @@ static const struct nfs4_minor_version_ops nfs_v4_2_minor_ops = {
| NFS_CAP_LAYOUTSTATS
| NFS_CAP_CLONE
| NFS_CAP_LAYOUTERROR
- | NFS_CAP_READ_PLUS,
+ | NFS_CAP_READ_PLUS
+ | NFS_CAP_MOVEABLE,
.init_client = nfs41_init_client,
.shutdown_client = nfs41_shutdown_client,
.match_stateid = nfs41_match_stateid,
@@ -10347,6 +10582,24 @@ static ssize_t nfs4_listxattr(struct dentry *dentry, char *list, size_t size)
return error + error2 + error3;
}
+static void nfs4_enable_swap(struct inode *inode)
+{
+ /* The state manager thread must always be running.
+ * It will notice the client is a swapper, and stay put.
+ */
+ struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
+
+ nfs4_schedule_state_manager(clp);
+}
+
+static void nfs4_disable_swap(struct inode *inode)
+{
+ /* The state manager thread will now exit once it is
+ * woken.
+ */
+ wake_up_var(&NFS_SERVER(inode)->nfs_client->cl_state);
+}
+
static const struct inode_operations nfs4_dir_inode_operations = {
.create = nfs_create,
.lookup = nfs_lookup,
@@ -10423,6 +10676,9 @@ const struct nfs_rpc_ops nfs_v4_clientops = {
.free_client = nfs4_free_client,
.create_server = nfs4_create_server,
.clone_server = nfs_clone_server,
+ .discover_trunking = nfs4_discover_trunking,
+ .enable_swap = nfs4_enable_swap,
+ .disable_swap = nfs4_disable_swap,
};
static const struct xattr_handler nfs4_xattr_nfs4_acl_handler = {
@@ -10432,6 +10688,22 @@ static const struct xattr_handler nfs4_xattr_nfs4_acl_handler = {
.set = nfs4_xattr_set_nfs4_acl,
};
+#if defined(CONFIG_NFS_V4_1)
+static const struct xattr_handler nfs4_xattr_nfs4_dacl_handler = {
+ .name = XATTR_NAME_NFSV4_DACL,
+ .list = nfs4_xattr_list_nfs4_dacl,
+ .get = nfs4_xattr_get_nfs4_dacl,
+ .set = nfs4_xattr_set_nfs4_dacl,
+};
+
+static const struct xattr_handler nfs4_xattr_nfs4_sacl_handler = {
+ .name = XATTR_NAME_NFSV4_SACL,
+ .list = nfs4_xattr_list_nfs4_sacl,
+ .get = nfs4_xattr_get_nfs4_sacl,
+ .set = nfs4_xattr_set_nfs4_sacl,
+};
+#endif
+
#ifdef CONFIG_NFS_V4_2
static const struct xattr_handler nfs4_xattr_nfs4_user_handler = {
.prefix = XATTR_USER_PREFIX,
@@ -10442,6 +10714,10 @@ static const struct xattr_handler nfs4_xattr_nfs4_user_handler = {
const struct xattr_handler *nfs4_xattr_handlers[] = {
&nfs4_xattr_nfs4_acl_handler,
+#if defined(CONFIG_NFS_V4_1)
+ &nfs4_xattr_nfs4_dacl_handler,
+ &nfs4_xattr_nfs4_sacl_handler,
+#endif
#ifdef CONFIG_NFS_V4_SECURITY_LABEL
&nfs4_xattr_nfs4_label_handler,
#endif
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index f63dfa01001c..a2d2d5d1b088 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -49,6 +49,7 @@
#include <linux/workqueue.h>
#include <linux/bitops.h>
#include <linux/jiffies.h>
+#include <linux/sched/mm.h>
#include <linux/sunrpc/clnt.h>
@@ -496,8 +497,7 @@ nfs4_alloc_state_owner(struct nfs_server *server,
sp = kzalloc(sizeof(*sp), gfp_flags);
if (!sp)
return NULL;
- sp->so_seqid.owner_id = ida_simple_get(&server->openowner_id, 0, 0,
- gfp_flags);
+ sp->so_seqid.owner_id = ida_alloc(&server->openowner_id, gfp_flags);
if (sp->so_seqid.owner_id < 0) {
kfree(sp);
return NULL;
@@ -533,7 +533,7 @@ static void nfs4_free_state_owner(struct nfs4_state_owner *sp)
{
nfs4_destroy_seqid_counter(&sp->so_seqid);
put_cred(sp->so_cred);
- ida_simple_remove(&sp->so_server->openowner_id, sp->so_seqid.owner_id);
+ ida_free(&sp->so_server->openowner_id, sp->so_seqid.owner_id);
kfree(sp);
}
@@ -666,7 +666,7 @@ nfs4_alloc_open_state(void)
{
struct nfs4_state *state;
- state = kzalloc(sizeof(*state), GFP_NOFS);
+ state = kzalloc(sizeof(*state), GFP_KERNEL_ACCOUNT);
if (!state)
return NULL;
refcount_set(&state->count, 1);
@@ -820,7 +820,7 @@ static void __nfs4_close(struct nfs4_state *state,
void nfs4_close_state(struct nfs4_state *state, fmode_t fmode)
{
- __nfs4_close(state, fmode, GFP_NOFS, 0);
+ __nfs4_close(state, fmode, GFP_KERNEL, 0);
}
void nfs4_close_sync(struct nfs4_state *state, fmode_t fmode)
@@ -869,14 +869,14 @@ static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, f
struct nfs4_lock_state *lsp;
struct nfs_server *server = state->owner->so_server;
- lsp = kzalloc(sizeof(*lsp), GFP_NOFS);
+ lsp = kzalloc(sizeof(*lsp), GFP_KERNEL_ACCOUNT);
if (lsp == NULL)
return NULL;
nfs4_init_seqid_counter(&lsp->ls_seqid);
refcount_set(&lsp->ls_count, 1);
lsp->ls_state = state;
lsp->ls_owner = fl_owner;
- lsp->ls_seqid.owner_id = ida_simple_get(&server->lockowner_id, 0, 0, GFP_NOFS);
+ lsp->ls_seqid.owner_id = ida_alloc(&server->lockowner_id, GFP_KERNEL_ACCOUNT);
if (lsp->ls_seqid.owner_id < 0)
goto out_free;
INIT_LIST_HEAD(&lsp->ls_locks);
@@ -888,7 +888,7 @@ out_free:
void nfs4_free_lock_state(struct nfs_server *server, struct nfs4_lock_state *lsp)
{
- ida_simple_remove(&server->lockowner_id, lsp->ls_seqid.owner_id);
+ ida_free(&server->lockowner_id, lsp->ls_seqid.owner_id);
nfs4_destroy_seqid_counter(&lsp->ls_seqid);
kfree(lsp);
}
@@ -1205,10 +1205,17 @@ void nfs4_schedule_state_manager(struct nfs_client *clp)
{
struct task_struct *task;
char buf[INET6_ADDRSTRLEN + sizeof("-manager") + 1];
+ struct rpc_clnt *cl = clp->cl_rpcclient;
+
+ while (cl != cl->cl_parent)
+ cl = cl->cl_parent;
set_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state);
- if (test_and_set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) != 0)
+ if (test_and_set_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state) != 0) {
+ wake_up_var(&clp->cl_state);
return;
+ }
+ set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state);
__module_get(THIS_MODULE);
refcount_inc(&clp->cl_count);
@@ -1224,6 +1231,7 @@ void nfs4_schedule_state_manager(struct nfs_client *clp)
printk(KERN_ERR "%s: kthread_run: %ld\n",
__func__, PTR_ERR(task));
nfs4_clear_state_manager_bit(clp);
+ clear_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state);
nfs_put_client(clp);
module_put(THIS_MODULE);
}
@@ -1304,7 +1312,8 @@ int nfs4_wait_clnt_recover(struct nfs_client *clp)
refcount_inc(&clp->cl_count);
res = wait_on_bit_action(&clp->cl_state, NFS4CLNT_MANAGER_RUNNING,
- nfs_wait_bit_killable, TASK_KILLABLE);
+ nfs_wait_bit_killable,
+ TASK_KILLABLE|TASK_FREEZABLE_UNSAFE);
if (res)
goto out;
if (clp->cl_cons_state < 0)
@@ -1592,7 +1601,8 @@ static inline void nfs42_complete_copies(struct nfs4_state_owner *sp,
#endif /* CONFIG_NFS_V4_2 */
static int __nfs4_reclaim_open_state(struct nfs4_state_owner *sp, struct nfs4_state *state,
- const struct nfs4_state_recovery_ops *ops)
+ const struct nfs4_state_recovery_ops *ops,
+ int *lost_locks)
{
struct nfs4_lock_state *lock;
int status;
@@ -1610,7 +1620,7 @@ static int __nfs4_reclaim_open_state(struct nfs4_state_owner *sp, struct nfs4_st
list_for_each_entry(lock, &state->lock_states, ls_locks) {
trace_nfs4_state_lock_reclaim(state, lock);
if (!test_bit(NFS_LOCK_INITIALIZED, &lock->ls_flags))
- pr_warn_ratelimited("NFS: %s: Lock reclaim failed!\n", __func__);
+ *lost_locks += 1;
}
spin_unlock(&state->state_lock);
}
@@ -1620,7 +1630,9 @@ static int __nfs4_reclaim_open_state(struct nfs4_state_owner *sp, struct nfs4_st
return status;
}
-static int nfs4_reclaim_open_state(struct nfs4_state_owner *sp, const struct nfs4_state_recovery_ops *ops)
+static int nfs4_reclaim_open_state(struct nfs4_state_owner *sp,
+ const struct nfs4_state_recovery_ops *ops,
+ int *lost_locks)
{
struct nfs4_state *state;
unsigned int loop = 0;
@@ -1656,7 +1668,7 @@ restart:
#endif /* CONFIG_NFS_V4_2 */
refcount_inc(&state->count);
spin_unlock(&sp->so_lock);
- status = __nfs4_reclaim_open_state(sp, state, ops);
+ status = __nfs4_reclaim_open_state(sp, state, ops, lost_locks);
switch (status) {
default:
@@ -1774,6 +1786,7 @@ static void nfs4_state_mark_reclaim_helper(struct nfs_client *clp,
static void nfs4_state_start_reclaim_reboot(struct nfs_client *clp)
{
+ set_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state);
/* Mark all delegations for reclaim */
nfs_delegation_mark_reclaim(clp);
nfs4_state_mark_reclaim_helper(clp, nfs4_state_mark_reclaim_reboot);
@@ -1899,6 +1912,7 @@ static int nfs4_do_reclaim(struct nfs_client *clp, const struct nfs4_state_recov
struct rb_node *pos;
LIST_HEAD(freeme);
int status = 0;
+ int lost_locks = 0;
restart:
rcu_read_lock();
@@ -1918,8 +1932,11 @@ restart:
spin_unlock(&clp->cl_lock);
rcu_read_unlock();
- status = nfs4_reclaim_open_state(sp, ops);
+ status = nfs4_reclaim_open_state(sp, ops, &lost_locks);
if (status < 0) {
+ if (lost_locks)
+ pr_warn("NFS: %s: lost %d locks\n",
+ clp->cl_hostname, lost_locks);
set_bit(ops->owner_flag_bit, &sp->so_flags);
nfs4_put_state_owner(sp);
status = nfs4_recovery_handle_error(clp, status);
@@ -1933,6 +1950,9 @@ restart:
}
rcu_read_unlock();
nfs4_free_state_owners(&freeme);
+ if (lost_locks)
+ pr_warn("NFS: %s: lost %d locks\n",
+ clp->cl_hostname, lost_locks);
return 0;
}
@@ -2096,9 +2116,15 @@ static int nfs4_try_migration(struct nfs_server *server, const struct cred *cred
dprintk("<-- %s: no memory\n", __func__);
goto out;
}
+ locations->fattr = nfs_alloc_fattr();
+ if (locations->fattr == NULL) {
+ dprintk("<-- %s: no memory\n", __func__);
+ goto out;
+ }
inode = d_inode(server->super->s_root);
- result = nfs4_proc_get_locations(inode, locations, page, cred);
+ result = nfs4_proc_get_locations(server, NFS_FH(inode), locations,
+ page, cred);
if (result) {
dprintk("<-- %s: failed to retrieve fs_locations: %d\n",
__func__, result);
@@ -2106,7 +2132,10 @@ static int nfs4_try_migration(struct nfs_server *server, const struct cred *cred
}
result = -NFS4ERR_NXIO;
- if (!(locations->fattr.valid & NFS_ATTR_FATTR_V4_LOCATIONS)) {
+ if (!locations->nlocations)
+ goto out;
+
+ if (!(locations->fattr->valid & NFS_ATTR_FATTR_V4_LOCATIONS)) {
dprintk("<-- %s: No fs_locations data, migration skipped\n",
__func__);
goto out;
@@ -2131,6 +2160,8 @@ static int nfs4_try_migration(struct nfs_server *server, const struct cred *cred
out:
if (page != NULL)
__free_page(page);
+ if (locations != NULL)
+ kfree(locations->fattr);
kfree(locations);
if (result) {
pr_err("NFS: migration recovery failed (server %s)\n",
@@ -2556,9 +2587,17 @@ static void nfs4_layoutreturn_any_run(struct nfs_client *clp)
static void nfs4_state_manager(struct nfs_client *clp)
{
+ unsigned int memflags;
int status = 0;
const char *section = "", *section_sep = "";
+ /*
+ * State recovery can deadlock if the direct reclaim code tries
+ * start NFS writeback. So ensure memory allocations are all
+ * GFP_NOFS.
+ */
+ memflags = memalloc_nofs_save();
+
/* Ensure exclusive access to NFSv4 state */
do {
trace_nfs4_state_mgr(clp);
@@ -2632,6 +2671,7 @@ static void nfs4_state_manager(struct nfs_client *clp)
if (status < 0)
goto out_error;
nfs4_state_end_reclaim_reboot(clp);
+ continue;
}
/* Detect expired delegations... */
@@ -2653,6 +2693,7 @@ static void nfs4_state_manager(struct nfs_client *clp)
clear_bit(NFS4CLNT_RECLAIM_NOGRACE, &clp->cl_state);
}
+ memalloc_nofs_restore(memflags);
nfs4_end_drain_session(clp);
nfs4_clear_state_manager_bit(clp);
@@ -2665,11 +2706,8 @@ static void nfs4_state_manager(struct nfs_client *clp)
clear_bit(NFS4CLNT_RECALL_RUNNING, &clp->cl_state);
}
- /* Did we race with an attempt to give us more work? */
- if (!test_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state))
- return;
- if (test_and_set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) != 0)
- return;
+ return;
+
} while (refcount_read(&clp->cl_count) > 1 && !signalled());
goto out_drain;
@@ -2682,6 +2720,7 @@ out_error:
clp->cl_hostname, -status);
ssleep(1);
out_drain:
+ memalloc_nofs_restore(memflags);
nfs4_end_drain_session(clp);
nfs4_clear_state_manager_bit(clp);
}
@@ -2689,10 +2728,32 @@ out_drain:
static int nfs4_run_state_manager(void *ptr)
{
struct nfs_client *clp = ptr;
+ struct rpc_clnt *cl = clp->cl_rpcclient;
+
+ while (cl != cl->cl_parent)
+ cl = cl->cl_parent;
allow_signal(SIGKILL);
+again:
+ set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state);
nfs4_state_manager(clp);
+ if (atomic_read(&cl->cl_swapper)) {
+ wait_var_event_interruptible(&clp->cl_state,
+ test_bit(NFS4CLNT_RUN_MANAGER,
+ &clp->cl_state));
+ if (atomic_read(&cl->cl_swapper) &&
+ test_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state))
+ goto again;
+ /* Either no longer a swapper, or were signalled */
+ }
+ clear_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state);
+
+ if (refcount_read(&clp->cl_count) > 1 && !signalled() &&
+ test_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state) &&
+ !test_and_set_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state))
+ goto again;
+
nfs_put_client(clp);
- module_put_and_exit(0);
+ module_put_and_kthread_exit(0);
return 0;
}
diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h
index 6ee6ad3674a2..2cff5901c689 100644
--- a/fs/nfs/nfs4trace.h
+++ b/fs/nfs/nfs4trace.h
@@ -2097,6 +2097,7 @@ TRACE_EVENT(ff_layout_commit_error,
)
);
+#ifdef CONFIG_NFS_V4_2
TRACE_DEFINE_ENUM(NFS4_CONTENT_DATA);
TRACE_DEFINE_ENUM(NFS4_CONTENT_HOLE);
@@ -2105,7 +2106,6 @@ TRACE_DEFINE_ENUM(NFS4_CONTENT_HOLE);
{ NFS4_CONTENT_DATA, "DATA" }, \
{ NFS4_CONTENT_HOLE, "HOLE" })
-#ifdef CONFIG_NFS_V4_2
TRACE_EVENT(nfs4_llseek,
TP_PROTO(
const struct inode *inode,
@@ -2496,6 +2496,54 @@ TRACE_EVENT(nfs4_offload_cancel,
__entry->stateid_seq, __entry->stateid_hash
)
);
+
+DECLARE_EVENT_CLASS(nfs4_xattr_event,
+ TP_PROTO(
+ const struct inode *inode,
+ const char *name,
+ int error
+ ),
+
+ TP_ARGS(inode, name, error),
+
+ TP_STRUCT__entry(
+ __field(unsigned long, error)
+ __field(dev_t, dev)
+ __field(u32, fhandle)
+ __field(u64, fileid)
+ __string(name, name)
+ ),
+
+ TP_fast_assign(
+ __entry->error = error < 0 ? -error : 0;
+ __entry->dev = inode->i_sb->s_dev;
+ __entry->fileid = NFS_FILEID(inode);
+ __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode));
+ __assign_str(name, name);
+ ),
+
+ TP_printk(
+ "error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x "
+ "name=%s",
+ -__entry->error, show_nfs4_status(__entry->error),
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long long)__entry->fileid,
+ __entry->fhandle, __get_str(name)
+ )
+);
+#define DEFINE_NFS4_XATTR_EVENT(name) \
+ DEFINE_EVENT(nfs4_xattr_event, name, \
+ TP_PROTO( \
+ const struct inode *inode, \
+ const char *name, \
+ int error \
+ ), \
+ TP_ARGS(inode, name, error))
+DEFINE_NFS4_XATTR_EVENT(nfs4_getxattr);
+DEFINE_NFS4_XATTR_EVENT(nfs4_setxattr);
+DEFINE_NFS4_XATTR_EVENT(nfs4_removexattr);
+
+DEFINE_NFS4_INODE_EVENT(nfs4_listxattr);
#endif /* CONFIG_NFS_V4_2 */
#endif /* CONFIG_NFS_V4_1 */
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 69862bf6db00..acfe5f4bda48 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -1605,7 +1605,8 @@ static void encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg
FATTR4_WORD0_RDATTR_ERROR,
FATTR4_WORD1_MOUNTED_ON_FILEID,
};
- uint32_t dircount = readdir->count >> 1;
+ uint32_t dircount = readdir->count;
+ uint32_t maxcount = readdir->count;
__be32 *p, verf[2];
uint32_t attrlen = 0;
unsigned int i;
@@ -1618,7 +1619,6 @@ static void encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg
FATTR4_WORD1_SPACE_USED|FATTR4_WORD1_TIME_ACCESS|
FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY;
attrs[2] |= FATTR4_WORD2_SECURITY_LABEL;
- dircount >>= 1;
}
/* Use mounted_on_fileid only if the server supports it */
if (!(readdir->bitmask[1] & FATTR4_WORD1_MOUNTED_ON_FILEID))
@@ -1634,7 +1634,7 @@ static void encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg
encode_nfs4_verifier(xdr, &readdir->verifier);
p = reserve_space(xdr, 12 + (attrlen << 2));
*p++ = cpu_to_be32(dircount);
- *p++ = cpu_to_be32(readdir->count);
+ *p++ = cpu_to_be32(maxcount);
*p++ = cpu_to_be32(attrlen);
for (i = 0; i < attrlen; i++)
*p++ = cpu_to_be32(attrs[i]);
@@ -1680,19 +1680,35 @@ encode_restorefh(struct xdr_stream *xdr, struct compound_hdr *hdr)
encode_op_hdr(xdr, OP_RESTOREFH, decode_restorefh_maxsz, hdr);
}
-static void
-encode_setacl(struct xdr_stream *xdr, const struct nfs_setaclargs *arg,
- struct compound_hdr *hdr)
+static void nfs4_acltype_to_bitmap(enum nfs4_acl_type type, __u32 bitmap[2])
{
- __be32 *p;
+ switch (type) {
+ default:
+ bitmap[0] = FATTR4_WORD0_ACL;
+ bitmap[1] = 0;
+ break;
+ case NFS4ACL_DACL:
+ bitmap[0] = 0;
+ bitmap[1] = FATTR4_WORD1_DACL;
+ break;
+ case NFS4ACL_SACL:
+ bitmap[0] = 0;
+ bitmap[1] = FATTR4_WORD1_SACL;
+ }
+}
+
+static void encode_setacl(struct xdr_stream *xdr,
+ const struct nfs_setaclargs *arg,
+ struct compound_hdr *hdr)
+{
+ __u32 bitmap[2];
+
+ nfs4_acltype_to_bitmap(arg->acl_type, bitmap);
encode_op_hdr(xdr, OP_SETATTR, decode_setacl_maxsz, hdr);
encode_nfs4_stateid(xdr, &zero_stateid);
- p = reserve_space(xdr, 2*4);
- *p++ = cpu_to_be32(1);
- *p = cpu_to_be32(FATTR4_WORD0_ACL);
- p = reserve_space(xdr, 4);
- *p = cpu_to_be32(arg->acl_len);
+ xdr_encode_bitmap4(xdr, bitmap, ARRAY_SIZE(bitmap));
+ encode_uint32(xdr, arg->acl_len);
xdr_write_pages(xdr, arg->acl_pages, 0, arg->acl_len);
}
@@ -2587,11 +2603,11 @@ static void nfs4_xdr_enc_getacl(struct rpc_rqst *req, struct xdr_stream *xdr,
struct compound_hdr hdr = {
.minorversion = nfs4_xdr_minorversion(&args->seq_args),
};
- const __u32 nfs4_acl_bitmap[1] = {
- [0] = FATTR4_WORD0_ACL,
- };
+ __u32 nfs4_acl_bitmap[2];
uint32_t replen;
+ nfs4_acltype_to_bitmap(args->acl_type, nfs4_acl_bitmap);
+
encode_compound_hdr(xdr, req, &hdr);
encode_sequence(xdr, &args->seq_args, &hdr);
encode_putfh(xdr, args->fh, &hdr);
@@ -3533,6 +3549,42 @@ static int decode_attr_aclsupport(struct xdr_stream *xdr, uint32_t *bitmap, uint
return 0;
}
+static int decode_attr_case_insensitive(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res)
+{
+ __be32 *p;
+
+ *res = 0;
+ if (unlikely(bitmap[0] & (FATTR4_WORD0_CASE_INSENSITIVE - 1U)))
+ return -EIO;
+ if (likely(bitmap[0] & FATTR4_WORD0_CASE_INSENSITIVE)) {
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(!p))
+ return -EIO;
+ *res = be32_to_cpup(p);
+ bitmap[0] &= ~FATTR4_WORD0_CASE_INSENSITIVE;
+ }
+ dprintk("%s: case_insensitive=%s\n", __func__, *res == 0 ? "false" : "true");
+ return 0;
+}
+
+static int decode_attr_case_preserving(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res)
+{
+ __be32 *p;
+
+ *res = 0;
+ if (unlikely(bitmap[0] & (FATTR4_WORD0_CASE_PRESERVING - 1U)))
+ return -EIO;
+ if (likely(bitmap[0] & FATTR4_WORD0_CASE_PRESERVING)) {
+ p = xdr_inline_decode(xdr, 4);
+ if (unlikely(!p))
+ return -EIO;
+ *res = be32_to_cpup(p);
+ bitmap[0] &= ~FATTR4_WORD0_CASE_PRESERVING;
+ }
+ dprintk("%s: case_preserving=%s\n", __func__, *res == 0 ? "false" : "true");
+ return 0;
+}
+
static int decode_attr_fileid(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *fileid)
{
__be32 *p;
@@ -3696,8 +3748,6 @@ static int decode_attr_fs_locations(struct xdr_stream *xdr, uint32_t *bitmap, st
if (unlikely(!p))
goto out_eio;
n = be32_to_cpup(p);
- if (n <= 0)
- goto out_eio;
for (res->nlocations = 0; res->nlocations < n; res->nlocations++) {
u32 m;
struct nfs4_fs_location *loc;
@@ -4200,10 +4250,11 @@ static int decode_attr_security_label(struct xdr_stream *xdr, uint32_t *bitmap,
} else
printk(KERN_WARNING "%s: label too long (%u)!\n",
__func__, len);
+ if (label && label->label)
+ dprintk("%s: label=%.*s, len=%d, PI=%d, LFS=%d\n",
+ __func__, label->len, (char *)label->label,
+ label->len, label->pi, label->lfs);
}
- if (label && label->label)
- dprintk("%s: label=%s, len=%d, PI=%d, LFS=%d\n", __func__,
- (char *)label->label, label->len, label->pi, label->lfs);
return status;
}
@@ -4412,6 +4463,10 @@ static int decode_server_caps(struct xdr_stream *xdr, struct nfs4_server_caps_re
goto xdr_error;
if ((status = decode_attr_aclsupport(xdr, bitmap, &res->acl_bitmask)) != 0)
goto xdr_error;
+ if ((status = decode_attr_case_insensitive(xdr, bitmap, &res->case_insensitive)) != 0)
+ goto xdr_error;
+ if ((status = decode_attr_case_preserving(xdr, bitmap, &res->case_preserving)) != 0)
+ goto xdr_error;
if ((status = decode_attr_exclcreat_supported(xdr, bitmap,
res->exclcreat_bitmask)) != 0)
goto xdr_error;
@@ -5347,7 +5402,7 @@ decode_restorefh(struct xdr_stream *xdr)
}
static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req,
- struct nfs_getaclres *res)
+ struct nfs_getaclres *res, enum nfs4_acl_type type)
{
unsigned int savep;
uint32_t attrlen,
@@ -5365,26 +5420,39 @@ static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req,
if ((status = decode_attr_length(xdr, &attrlen, &savep)) != 0)
goto out;
- if (unlikely(bitmap[0] & (FATTR4_WORD0_ACL - 1U)))
- return -EIO;
- if (likely(bitmap[0] & FATTR4_WORD0_ACL)) {
-
- /* The bitmap (xdr len + bitmaps) and the attr xdr len words
- * are stored with the acl data to handle the problem of
- * variable length bitmaps.*/
- res->acl_data_offset = xdr_page_pos(xdr);
- res->acl_len = attrlen;
-
- /* Check for receive buffer overflow */
- if (res->acl_len > xdr_stream_remaining(xdr) ||
- res->acl_len + res->acl_data_offset > xdr->buf->page_len) {
- res->acl_flags |= NFS4_ACL_TRUNC;
- dprintk("NFS: acl reply: attrlen %u > page_len %zu\n",
- attrlen, xdr_stream_remaining(xdr));
- }
- } else
- status = -EOPNOTSUPP;
+ switch (type) {
+ default:
+ if (unlikely(bitmap[0] & (FATTR4_WORD0_ACL - 1U)))
+ return -EIO;
+ if (!(bitmap[0] & FATTR4_WORD0_ACL))
+ return -EOPNOTSUPP;
+ break;
+ case NFS4ACL_DACL:
+ if (unlikely(bitmap[0] || bitmap[1] & (FATTR4_WORD1_DACL - 1U)))
+ return -EIO;
+ if (!(bitmap[1] & FATTR4_WORD1_DACL))
+ return -EOPNOTSUPP;
+ break;
+ case NFS4ACL_SACL:
+ if (unlikely(bitmap[0] || bitmap[1] & (FATTR4_WORD1_SACL - 1U)))
+ return -EIO;
+ if (!(bitmap[1] & FATTR4_WORD1_SACL))
+ return -EOPNOTSUPP;
+ }
+ /* The bitmap (xdr len + bitmaps) and the attr xdr len words
+ * are stored with the acl data to handle the problem of
+ * variable length bitmaps.*/
+ res->acl_data_offset = xdr_page_pos(xdr);
+ res->acl_len = attrlen;
+
+ /* Check for receive buffer overflow */
+ if (res->acl_len > xdr_stream_remaining(xdr) ||
+ res->acl_len + res->acl_data_offset > xdr->buf->page_len) {
+ res->acl_flags |= NFS4_ACL_TRUNC;
+ dprintk("NFS: acl reply: attrlen %u > page_len %zu\n",
+ attrlen, xdr_stream_remaining(xdr));
+ }
out:
return status;
}
@@ -6447,7 +6515,7 @@ nfs4_xdr_dec_getacl(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
status = decode_putfh(xdr);
if (status)
goto out;
- status = decode_getacl(xdr, rqstp, res);
+ status = decode_getacl(xdr, rqstp, res, res->acl_type);
out:
return status;
@@ -7012,7 +7080,7 @@ static int nfs4_xdr_dec_fs_locations(struct rpc_rqst *req,
if (res->migration) {
xdr_enter_page(xdr, PAGE_SIZE);
status = decode_getfattr_generic(xdr,
- &res->fs_locations->fattr,
+ res->fs_locations->fattr,
NULL, res->fs_locations,
res->fs_locations->server);
if (status)
@@ -7025,7 +7093,7 @@ static int nfs4_xdr_dec_fs_locations(struct rpc_rqst *req,
goto out;
xdr_enter_page(xdr, PAGE_SIZE);
status = decode_getfattr_generic(xdr,
- &res->fs_locations->fattr,
+ res->fs_locations->fattr,
NULL, res->fs_locations,
res->fs_locations->server);
}
@@ -7469,7 +7537,6 @@ int nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
if (entry->fattr->valid & NFS_ATTR_FATTR_TYPE)
entry->d_type = nfs_umode_to_dtype(entry->fattr->mode);
- entry->prev_cookie = entry->cookie;
entry->cookie = new_cookie;
return 0;
diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c
index fa148308822c..620329b7e6ae 100644
--- a/fs/nfs/nfsroot.c
+++ b/fs/nfs/nfsroot.c
@@ -139,7 +139,7 @@ static int __init nfs_root_setup(char *line)
ROOT_DEV = Root_NFS;
if (line[0] == '/' || line[0] == ',' || (line[0] >= '0' && line[0] <= '9')) {
- strlcpy(nfs_root_parms, line, sizeof(nfs_root_parms));
+ strscpy(nfs_root_parms, line, sizeof(nfs_root_parms));
} else {
size_t n = strlen(line) + sizeof(NFS_ROOT) - 1;
if (n >= sizeof(nfs_root_parms))
diff --git a/fs/nfs/nfstrace.h b/fs/nfs/nfstrace.h
index b3aee261801e..8c6cc58679ff 100644
--- a/fs/nfs/nfstrace.h
+++ b/fs/nfs/nfstrace.h
@@ -21,7 +21,6 @@
{ NFS_INO_INVALID_ATIME, "INVALID_ATIME" }, \
{ NFS_INO_INVALID_ACCESS, "INVALID_ACCESS" }, \
{ NFS_INO_INVALID_ACL, "INVALID_ACL" }, \
- { NFS_INO_REVAL_PAGECACHE, "REVAL_PAGECACHE" }, \
{ NFS_INO_REVAL_FORCED, "REVAL_FORCED" }, \
{ NFS_INO_INVALID_LABEL, "INVALID_LABEL" }, \
{ NFS_INO_INVALID_CHANGE, "INVALID_CHANGE" }, \
@@ -37,12 +36,10 @@
#define nfs_show_nfsi_flags(v) \
__print_flags(v, "|", \
- { BIT(NFS_INO_ADVISE_RDPLUS), "ADVISE_RDPLUS" }, \
{ BIT(NFS_INO_STALE), "STALE" }, \
{ BIT(NFS_INO_ACL_LRU_SET), "ACL_LRU_SET" }, \
{ BIT(NFS_INO_INVALIDATING), "INVALIDATING" }, \
{ BIT(NFS_INO_FSCACHE), "FSCACHE" }, \
- { BIT(NFS_INO_FSCACHE_LOCK), "FSCACHE_LOCK" }, \
{ BIT(NFS_INO_LAYOUTCOMMIT), "NEED_LAYOUTCOMMIT" }, \
{ BIT(NFS_INO_LAYOUTCOMMITTING), "LAYOUTCOMMIT" }, \
{ BIT(NFS_INO_LAYOUTSTATS), "LAYOUTSTATS" }, \
@@ -163,6 +160,9 @@ DEFINE_NFS_INODE_EVENT(nfs_fsync_enter);
DEFINE_NFS_INODE_EVENT_DONE(nfs_fsync_exit);
DEFINE_NFS_INODE_EVENT(nfs_access_enter);
DEFINE_NFS_INODE_EVENT_DONE(nfs_set_cache_invalid);
+DEFINE_NFS_INODE_EVENT(nfs_readdir_force_readdirplus);
+DEFINE_NFS_INODE_EVENT_DONE(nfs_readdir_cache_fill_done);
+DEFINE_NFS_INODE_EVENT_DONE(nfs_readdir_uncached_done);
TRACE_EVENT(nfs_access_exit,
TP_PROTO(
@@ -274,6 +274,122 @@ DEFINE_NFS_UPDATE_SIZE_EVENT(wcc);
DEFINE_NFS_UPDATE_SIZE_EVENT(update);
DEFINE_NFS_UPDATE_SIZE_EVENT(grow);
+DECLARE_EVENT_CLASS(nfs_inode_range_event,
+ TP_PROTO(
+ const struct inode *inode,
+ loff_t range_start,
+ loff_t range_end
+ ),
+
+ TP_ARGS(inode, range_start, range_end),
+
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(u32, fhandle)
+ __field(u64, fileid)
+ __field(u64, version)
+ __field(loff_t, range_start)
+ __field(loff_t, range_end)
+ ),
+
+ TP_fast_assign(
+ const struct nfs_inode *nfsi = NFS_I(inode);
+
+ __entry->dev = inode->i_sb->s_dev;
+ __entry->fhandle = nfs_fhandle_hash(&nfsi->fh);
+ __entry->fileid = nfsi->fileid;
+ __entry->version = inode_peek_iversion_raw(inode);
+ __entry->range_start = range_start;
+ __entry->range_end = range_end;
+ ),
+
+ TP_printk(
+ "fileid=%02x:%02x:%llu fhandle=0x%08x version=%llu "
+ "range=[%lld, %lld]",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long long)__entry->fileid,
+ __entry->fhandle, __entry->version,
+ __entry->range_start, __entry->range_end
+ )
+);
+
+#define DEFINE_NFS_INODE_RANGE_EVENT(name) \
+ DEFINE_EVENT(nfs_inode_range_event, name, \
+ TP_PROTO( \
+ const struct inode *inode, \
+ loff_t range_start, \
+ loff_t range_end \
+ ), \
+ TP_ARGS(inode, range_start, range_end))
+
+DEFINE_NFS_INODE_RANGE_EVENT(nfs_readdir_invalidate_cache_range);
+
+DECLARE_EVENT_CLASS(nfs_readdir_event,
+ TP_PROTO(
+ const struct file *file,
+ const __be32 *verifier,
+ u64 cookie,
+ pgoff_t page_index,
+ unsigned int dtsize
+ ),
+
+ TP_ARGS(file, verifier, cookie, page_index, dtsize),
+
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(u32, fhandle)
+ __field(u64, fileid)
+ __field(u64, version)
+ __array(char, verifier, NFS4_VERIFIER_SIZE)
+ __field(u64, cookie)
+ __field(pgoff_t, index)
+ __field(unsigned int, dtsize)
+ ),
+
+ TP_fast_assign(
+ const struct inode *dir = file_inode(file);
+ const struct nfs_inode *nfsi = NFS_I(dir);
+
+ __entry->dev = dir->i_sb->s_dev;
+ __entry->fileid = nfsi->fileid;
+ __entry->fhandle = nfs_fhandle_hash(&nfsi->fh);
+ __entry->version = inode_peek_iversion_raw(dir);
+ if (cookie != 0)
+ memcpy(__entry->verifier, verifier,
+ NFS4_VERIFIER_SIZE);
+ else
+ memset(__entry->verifier, 0,
+ NFS4_VERIFIER_SIZE);
+ __entry->cookie = cookie;
+ __entry->index = page_index;
+ __entry->dtsize = dtsize;
+ ),
+
+ TP_printk(
+ "fileid=%02x:%02x:%llu fhandle=0x%08x version=%llu "
+ "cookie=%s:0x%llx cache_index=%lu dtsize=%u",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long long)__entry->fileid, __entry->fhandle,
+ __entry->version, show_nfs4_verifier(__entry->verifier),
+ (unsigned long long)__entry->cookie, __entry->index,
+ __entry->dtsize
+ )
+);
+
+#define DEFINE_NFS_READDIR_EVENT(name) \
+ DEFINE_EVENT(nfs_readdir_event, name, \
+ TP_PROTO( \
+ const struct file *file, \
+ const __be32 *verifier, \
+ u64 cookie, \
+ pgoff_t page_index, \
+ unsigned int dtsize \
+ ), \
+ TP_ARGS(file, verifier, cookie, page_index, dtsize))
+
+DEFINE_NFS_READDIR_EVENT(nfs_readdir_cache_fill);
+DEFINE_NFS_READDIR_EVENT(nfs_readdir_uncached);
+
DECLARE_EVENT_CLASS(nfs_lookup_event,
TP_PROTO(
const struct inode *dir,
@@ -367,6 +483,9 @@ DEFINE_NFS_LOOKUP_EVENT(nfs_lookup_enter);
DEFINE_NFS_LOOKUP_EVENT_DONE(nfs_lookup_exit);
DEFINE_NFS_LOOKUP_EVENT(nfs_lookup_revalidate_enter);
DEFINE_NFS_LOOKUP_EVENT_DONE(nfs_lookup_revalidate_exit);
+DEFINE_NFS_LOOKUP_EVENT(nfs_readdir_lookup);
+DEFINE_NFS_LOOKUP_EVENT(nfs_readdir_lookup_revalidate_failed);
+DEFINE_NFS_LOOKUP_EVENT_DONE(nfs_readdir_lookup_revalidate);
TRACE_EVENT(nfs_atomic_open_enter,
TP_PROTO(
@@ -890,11 +1009,11 @@ TRACE_EVENT(nfs_aop_readpage_done,
TRACE_EVENT(nfs_aop_readahead,
TP_PROTO(
const struct inode *inode,
- struct page *page,
+ loff_t pos,
unsigned int nr_pages
),
- TP_ARGS(inode, page, nr_pages),
+ TP_ARGS(inode, pos, nr_pages),
TP_STRUCT__entry(
__field(dev_t, dev)
@@ -912,7 +1031,7 @@ TRACE_EVENT(nfs_aop_readahead,
__entry->fileid = nfsi->fileid;
__entry->fhandle = nfs_fhandle_hash(&nfsi->fh);
__entry->version = inode_peek_iversion_raw(inode);
- __entry->offset = page_index(page) << PAGE_SHIFT;
+ __entry->offset = pos;
__entry->nr_pages = nr_pages;
),
@@ -1018,7 +1137,7 @@ TRACE_EVENT(nfs_readpage_done,
__field(u32, arg_count)
__field(u32, res_count)
__field(bool, eof)
- __field(int, status)
+ __field(int, error)
),
TP_fast_assign(
@@ -1027,7 +1146,7 @@ TRACE_EVENT(nfs_readpage_done,
const struct nfs_fh *fh = hdr->args.fh ?
hdr->args.fh : &nfsi->fh;
- __entry->status = task->tk_status;
+ __entry->error = task->tk_status;
__entry->offset = hdr->args.offset;
__entry->arg_count = hdr->args.count;
__entry->res_count = hdr->res.count;
@@ -1038,14 +1157,13 @@ TRACE_EVENT(nfs_readpage_done,
),
TP_printk(
- "fileid=%02x:%02x:%llu fhandle=0x%08x "
- "offset=%lld count=%u res=%u status=%d%s",
+ "error=%d fileid=%02x:%02x:%llu fhandle=0x%08x "
+ "offset=%lld count=%u res=%u%s", __entry->error,
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long long)__entry->fileid,
__entry->fhandle,
(long long)__entry->offset, __entry->arg_count,
- __entry->res_count, __entry->status,
- __entry->eof ? " eof" : ""
+ __entry->res_count, __entry->eof ? " eof" : ""
)
);
@@ -1065,7 +1183,7 @@ TRACE_EVENT(nfs_readpage_short,
__field(u32, arg_count)
__field(u32, res_count)
__field(bool, eof)
- __field(int, status)
+ __field(int, error)
),
TP_fast_assign(
@@ -1074,7 +1192,7 @@ TRACE_EVENT(nfs_readpage_short,
const struct nfs_fh *fh = hdr->args.fh ?
hdr->args.fh : &nfsi->fh;
- __entry->status = task->tk_status;
+ __entry->error = task->tk_status;
__entry->offset = hdr->args.offset;
__entry->arg_count = hdr->args.count;
__entry->res_count = hdr->res.count;
@@ -1085,17 +1203,107 @@ TRACE_EVENT(nfs_readpage_short,
),
TP_printk(
- "fileid=%02x:%02x:%llu fhandle=0x%08x "
- "offset=%lld count=%u res=%u status=%d%s",
+ "error=%d fileid=%02x:%02x:%llu fhandle=0x%08x "
+ "offset=%lld count=%u res=%u%s", __entry->error,
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long long)__entry->fileid,
__entry->fhandle,
(long long)__entry->offset, __entry->arg_count,
- __entry->res_count, __entry->status,
- __entry->eof ? " eof" : ""
+ __entry->res_count, __entry->eof ? " eof" : ""
)
);
+DECLARE_EVENT_CLASS(nfs_fscache_page_event,
+ TP_PROTO(
+ const struct inode *inode,
+ struct page *page
+ ),
+
+ TP_ARGS(inode, page),
+
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(u32, fhandle)
+ __field(u64, fileid)
+ __field(loff_t, offset)
+ ),
+
+ TP_fast_assign(
+ const struct nfs_inode *nfsi = NFS_I(inode);
+ const struct nfs_fh *fh = &nfsi->fh;
+
+ __entry->offset = page_index(page) << PAGE_SHIFT;
+ __entry->dev = inode->i_sb->s_dev;
+ __entry->fileid = nfsi->fileid;
+ __entry->fhandle = nfs_fhandle_hash(fh);
+ ),
+
+ TP_printk(
+ "fileid=%02x:%02x:%llu fhandle=0x%08x "
+ "offset=%lld",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long long)__entry->fileid,
+ __entry->fhandle,
+ (long long)__entry->offset
+ )
+);
+DECLARE_EVENT_CLASS(nfs_fscache_page_event_done,
+ TP_PROTO(
+ const struct inode *inode,
+ struct page *page,
+ int error
+ ),
+
+ TP_ARGS(inode, page, error),
+
+ TP_STRUCT__entry(
+ __field(int, error)
+ __field(dev_t, dev)
+ __field(u32, fhandle)
+ __field(u64, fileid)
+ __field(loff_t, offset)
+ ),
+
+ TP_fast_assign(
+ const struct nfs_inode *nfsi = NFS_I(inode);
+ const struct nfs_fh *fh = &nfsi->fh;
+
+ __entry->offset = page_index(page) << PAGE_SHIFT;
+ __entry->dev = inode->i_sb->s_dev;
+ __entry->fileid = nfsi->fileid;
+ __entry->fhandle = nfs_fhandle_hash(fh);
+ __entry->error = error;
+ ),
+
+ TP_printk(
+ "fileid=%02x:%02x:%llu fhandle=0x%08x "
+ "offset=%lld error=%d",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long long)__entry->fileid,
+ __entry->fhandle,
+ (long long)__entry->offset, __entry->error
+ )
+);
+#define DEFINE_NFS_FSCACHE_PAGE_EVENT(name) \
+ DEFINE_EVENT(nfs_fscache_page_event, name, \
+ TP_PROTO( \
+ const struct inode *inode, \
+ struct page *page \
+ ), \
+ TP_ARGS(inode, page))
+#define DEFINE_NFS_FSCACHE_PAGE_EVENT_DONE(name) \
+ DEFINE_EVENT(nfs_fscache_page_event_done, name, \
+ TP_PROTO( \
+ const struct inode *inode, \
+ struct page *page, \
+ int error \
+ ), \
+ TP_ARGS(inode, page, error))
+DEFINE_NFS_FSCACHE_PAGE_EVENT(nfs_fscache_read_page);
+DEFINE_NFS_FSCACHE_PAGE_EVENT_DONE(nfs_fscache_read_page_exit);
+DEFINE_NFS_FSCACHE_PAGE_EVENT(nfs_fscache_write_page);
+DEFINE_NFS_FSCACHE_PAGE_EVENT_DONE(nfs_fscache_write_page_exit);
+
TRACE_EVENT(nfs_pgio_error,
TP_PROTO(
const struct nfs_pgio_header *hdr,
@@ -1113,7 +1321,7 @@ TRACE_EVENT(nfs_pgio_error,
__field(u32, arg_count)
__field(u32, res_count)
__field(loff_t, pos)
- __field(int, status)
+ __field(int, error)
),
TP_fast_assign(
@@ -1122,7 +1330,7 @@ TRACE_EVENT(nfs_pgio_error,
const struct nfs_fh *fh = hdr->args.fh ?
hdr->args.fh : &nfsi->fh;
- __entry->status = error;
+ __entry->error = error;
__entry->offset = hdr->args.offset;
__entry->arg_count = hdr->args.count;
__entry->res_count = hdr->res.count;
@@ -1131,12 +1339,12 @@ TRACE_EVENT(nfs_pgio_error,
__entry->fhandle = nfs_fhandle_hash(fh);
),
- TP_printk("fileid=%02x:%02x:%llu fhandle=0x%08x "
- "offset=%lld count=%u res=%u pos=%llu status=%d",
+ TP_printk("error=%d fileid=%02x:%02x:%llu fhandle=0x%08x "
+ "offset=%lld count=%u res=%u pos=%llu", __entry->error,
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long long)__entry->fileid, __entry->fhandle,
(long long)__entry->offset, __entry->arg_count, __entry->res_count,
- __entry->pos, __entry->status
+ __entry->pos
)
);
@@ -1196,7 +1404,7 @@ TRACE_EVENT(nfs_writeback_done,
__field(loff_t, offset)
__field(u32, arg_count)
__field(u32, res_count)
- __field(int, status)
+ __field(int, error)
__field(unsigned long, stable)
__array(char, verifier, NFS4_VERIFIER_SIZE)
),
@@ -1208,7 +1416,7 @@ TRACE_EVENT(nfs_writeback_done,
hdr->args.fh : &nfsi->fh;
const struct nfs_writeverf *verf = hdr->res.verf;
- __entry->status = task->tk_status;
+ __entry->error = task->tk_status;
__entry->offset = hdr->args.offset;
__entry->arg_count = hdr->args.count;
__entry->res_count = hdr->res.count;
@@ -1222,14 +1430,14 @@ TRACE_EVENT(nfs_writeback_done,
),
TP_printk(
- "fileid=%02x:%02x:%llu fhandle=0x%08x "
- "offset=%lld count=%u res=%u status=%d stable=%s "
- "verifier=%s",
+ "error=%d fileid=%02x:%02x:%llu fhandle=0x%08x "
+ "offset=%lld count=%u res=%u stable=%s "
+ "verifier=%s", __entry->error,
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long long)__entry->fileid,
__entry->fhandle,
(long long)__entry->offset, __entry->arg_count,
- __entry->res_count, __entry->status,
+ __entry->res_count,
show_nfs_stable_how(__entry->stable),
show_nfs4_verifier(__entry->verifier)
)
@@ -1237,44 +1445,50 @@ TRACE_EVENT(nfs_writeback_done,
DECLARE_EVENT_CLASS(nfs_page_error_class,
TP_PROTO(
+ const struct inode *inode,
const struct nfs_page *req,
int error
),
- TP_ARGS(req, error),
+ TP_ARGS(inode, req, error),
TP_STRUCT__entry(
- __field(const void *, req)
- __field(pgoff_t, index)
- __field(unsigned int, offset)
- __field(unsigned int, pgbase)
- __field(unsigned int, bytes)
+ __field(dev_t, dev)
+ __field(u32, fhandle)
+ __field(u64, fileid)
+ __field(loff_t, offset)
+ __field(unsigned int, count)
__field(int, error)
),
TP_fast_assign(
- __entry->req = req;
- __entry->index = req->wb_index;
- __entry->offset = req->wb_offset;
- __entry->pgbase = req->wb_pgbase;
- __entry->bytes = req->wb_bytes;
+ const struct nfs_inode *nfsi = NFS_I(inode);
+ __entry->dev = inode->i_sb->s_dev;
+ __entry->fileid = nfsi->fileid;
+ __entry->fhandle = nfs_fhandle_hash(&nfsi->fh);
+ __entry->offset = req_offset(req);
+ __entry->count = req->wb_bytes;
__entry->error = error;
),
TP_printk(
- "req=%p index=%lu offset=%u pgbase=%u bytes=%u error=%d",
- __entry->req, __entry->index, __entry->offset,
- __entry->pgbase, __entry->bytes, __entry->error
+ "error=%d fileid=%02x:%02x:%llu fhandle=0x%08x "
+ "offset=%lld count=%u", __entry->error,
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long long)__entry->fileid,
+ __entry->fhandle, __entry->offset,
+ __entry->count
)
);
#define DEFINE_NFS_PAGEERR_EVENT(name) \
DEFINE_EVENT(nfs_page_error_class, name, \
TP_PROTO( \
+ const struct inode *inode, \
const struct nfs_page *req, \
int error \
), \
- TP_ARGS(req, error))
+ TP_ARGS(inode, req, error))
DEFINE_NFS_PAGEERR_EVENT(nfs_write_error);
DEFINE_NFS_PAGEERR_EVENT(nfs_comp_error);
@@ -1331,7 +1545,7 @@ TRACE_EVENT(nfs_commit_done,
__field(u32, fhandle)
__field(u64, fileid)
__field(loff_t, offset)
- __field(int, status)
+ __field(int, error)
__field(unsigned long, stable)
__array(char, verifier, NFS4_VERIFIER_SIZE)
),
@@ -1343,7 +1557,7 @@ TRACE_EVENT(nfs_commit_done,
data->args.fh : &nfsi->fh;
const struct nfs_writeverf *verf = data->res.verf;
- __entry->status = task->tk_status;
+ __entry->error = task->tk_status;
__entry->offset = data->args.offset;
__entry->stable = verf->committed;
memcpy(__entry->verifier,
@@ -1355,17 +1569,83 @@ TRACE_EVENT(nfs_commit_done,
),
TP_printk(
- "fileid=%02x:%02x:%llu fhandle=0x%08x "
- "offset=%lld status=%d stable=%s verifier=%s",
+ "error=%d fileid=%02x:%02x:%llu fhandle=0x%08x "
+ "offset=%lld stable=%s verifier=%s", __entry->error,
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long long)__entry->fileid,
__entry->fhandle,
- (long long)__entry->offset, __entry->status,
+ (long long)__entry->offset,
show_nfs_stable_how(__entry->stable),
show_nfs4_verifier(__entry->verifier)
)
);
+#define nfs_show_direct_req_flags(v) \
+ __print_flags(v, "|", \
+ { NFS_ODIRECT_DO_COMMIT, "DO_COMMIT" }, \
+ { NFS_ODIRECT_RESCHED_WRITES, "RESCHED_WRITES" }, \
+ { NFS_ODIRECT_SHOULD_DIRTY, "SHOULD DIRTY" }, \
+ { NFS_ODIRECT_DONE, "DONE" } )
+
+DECLARE_EVENT_CLASS(nfs_direct_req_class,
+ TP_PROTO(
+ const struct nfs_direct_req *dreq
+ ),
+
+ TP_ARGS(dreq),
+
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(u64, fileid)
+ __field(u32, fhandle)
+ __field(loff_t, offset)
+ __field(ssize_t, count)
+ __field(ssize_t, bytes_left)
+ __field(ssize_t, error)
+ __field(int, flags)
+ ),
+
+ TP_fast_assign(
+ const struct inode *inode = dreq->inode;
+ const struct nfs_inode *nfsi = NFS_I(inode);
+ const struct nfs_fh *fh = &nfsi->fh;
+
+ __entry->dev = inode->i_sb->s_dev;
+ __entry->fileid = nfsi->fileid;
+ __entry->fhandle = nfs_fhandle_hash(fh);
+ __entry->offset = dreq->io_start;
+ __entry->count = dreq->count;
+ __entry->bytes_left = dreq->bytes_left;
+ __entry->error = dreq->error;
+ __entry->flags = dreq->flags;
+ ),
+
+ TP_printk(
+ "error=%zd fileid=%02x:%02x:%llu fhandle=0x%08x "
+ "offset=%lld count=%zd bytes_left=%zd flags=%s",
+ __entry->error, MAJOR(__entry->dev),
+ MINOR(__entry->dev),
+ (unsigned long long)__entry->fileid,
+ __entry->fhandle, __entry->offset,
+ __entry->count, __entry->bytes_left,
+ nfs_show_direct_req_flags(__entry->flags)
+ )
+);
+
+#define DEFINE_NFS_DIRECT_REQ_EVENT(name) \
+ DEFINE_EVENT(nfs_direct_req_class, name, \
+ TP_PROTO( \
+ const struct nfs_direct_req *dreq \
+ ), \
+ TP_ARGS(dreq))
+
+DEFINE_NFS_DIRECT_REQ_EVENT(nfs_direct_commit_complete);
+DEFINE_NFS_DIRECT_REQ_EVENT(nfs_direct_resched_write);
+DEFINE_NFS_DIRECT_REQ_EVENT(nfs_direct_write_complete);
+DEFINE_NFS_DIRECT_REQ_EVENT(nfs_direct_write_completion);
+DEFINE_NFS_DIRECT_REQ_EVENT(nfs_direct_write_schedule_iovec);
+DEFINE_NFS_DIRECT_REQ_EVENT(nfs_direct_write_reschedule_io);
+
TRACE_EVENT(nfs_fh_to_dentry,
TP_PROTO(
const struct super_block *sb,
@@ -1399,6 +1679,65 @@ TRACE_EVENT(nfs_fh_to_dentry,
)
);
+TRACE_EVENT(nfs_mount_assign,
+ TP_PROTO(
+ const char *option,
+ const char *value
+ ),
+
+ TP_ARGS(option, value),
+
+ TP_STRUCT__entry(
+ __string(option, option)
+ __string(value, value)
+ ),
+
+ TP_fast_assign(
+ __assign_str(option, option);
+ __assign_str(value, value);
+ ),
+
+ TP_printk("option %s=%s",
+ __get_str(option), __get_str(value)
+ )
+);
+
+TRACE_EVENT(nfs_mount_option,
+ TP_PROTO(
+ const struct fs_parameter *param
+ ),
+
+ TP_ARGS(param),
+
+ TP_STRUCT__entry(
+ __string(option, param->key)
+ ),
+
+ TP_fast_assign(
+ __assign_str(option, param->key);
+ ),
+
+ TP_printk("option %s", __get_str(option))
+);
+
+TRACE_EVENT(nfs_mount_path,
+ TP_PROTO(
+ const char *path
+ ),
+
+ TP_ARGS(path),
+
+ TP_STRUCT__entry(
+ __string(path, path)
+ ),
+
+ TP_fast_assign(
+ __assign_str(path, path);
+ ),
+
+ TP_printk("path='%s'", __get_str(path))
+);
+
DECLARE_EVENT_CLASS(nfs_xdr_event,
TP_PROTO(
const struct xdr_stream *xdr,
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index ad7f83dc9a2d..317cedfa52bf 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -90,10 +90,10 @@ void nfs_set_pgio_error(struct nfs_pgio_header *hdr, int error, loff_t pos)
}
}
-static inline struct nfs_page *
-nfs_page_alloc(void)
+static inline struct nfs_page *nfs_page_alloc(void)
{
- struct nfs_page *p = kmem_cache_zalloc(nfs_page_cachep, GFP_KERNEL);
+ struct nfs_page *p =
+ kmem_cache_zalloc(nfs_page_cachep, nfs_io_gfp_mask());
if (p)
INIT_LIST_HEAD(&p->wb_list);
return p;
@@ -767,6 +767,9 @@ int nfs_initiate_pgio(struct rpc_clnt *clnt, struct nfs_pgio_header *hdr,
.flags = RPC_TASK_ASYNC | flags,
};
+ if (nfs_server_capable(hdr->inode, NFS_CAP_MOVEABLE))
+ task_setup_data.flags |= RPC_TASK_MOVEABLE;
+
hdr->rw_ops->rw_initiate(hdr, &msg, rpc_ops, &task_setup_data, how);
dprintk("NFS: initiated pgio call "
@@ -892,7 +895,7 @@ int nfs_generic_pgio(struct nfs_pageio_descriptor *desc,
struct nfs_commit_info cinfo;
struct nfs_page_array *pg_array = &hdr->page_array;
unsigned int pagecount, pageused;
- gfp_t gfp_flags = GFP_KERNEL;
+ gfp_t gfp_flags = nfs_io_gfp_mask();
pagecount = nfs_page_array_len(mirror->pg_base, mirror->pg_count);
pg_array->npages = pagecount;
@@ -979,7 +982,7 @@ nfs_pageio_alloc_mirrors(struct nfs_pageio_descriptor *desc,
desc->pg_mirrors_dynamic = NULL;
if (mirror_count == 1)
return desc->pg_mirrors_static;
- ret = kmalloc_array(mirror_count, sizeof(*ret), GFP_KERNEL);
+ ret = kmalloc_array(mirror_count, sizeof(*ret), nfs_io_gfp_mask());
if (ret != NULL) {
for (i = 0; i < mirror_count; i++)
nfs_pageio_mirror_init(&ret[i], desc->pg_bsize);
@@ -1218,6 +1221,7 @@ static int nfs_do_recoalesce(struct nfs_pageio_descriptor *desc)
do {
list_splice_init(&mirror->pg_list, &head);
+ mirror->pg_recoalesce = 0;
while (!list_empty(&head)) {
struct nfs_page *req;
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 7c9090a28e5c..a5db5158c634 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -92,6 +92,17 @@ find_pnfs_driver(u32 id)
return local;
}
+const struct pnfs_layoutdriver_type *pnfs_find_layoutdriver(u32 id)
+{
+ return find_pnfs_driver(id);
+}
+
+void pnfs_put_layoutdriver(const struct pnfs_layoutdriver_type *ld)
+{
+ if (ld)
+ module_put(ld->owner);
+}
+
void
unset_pnfs_layoutdriver(struct nfs_server *nfss)
{
@@ -458,6 +469,7 @@ pnfs_mark_layout_stateid_invalid(struct pnfs_layout_hdr *lo,
pnfs_clear_lseg_state(lseg, lseg_list);
pnfs_clear_layoutreturn_info(lo);
pnfs_free_returned_lsegs(lo, lseg_list, &range, 0);
+ set_bit(NFS_LAYOUT_DRAIN, &lo->plh_flags);
if (test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags) &&
!test_and_set_bit(NFS_LAYOUT_RETURN_LOCK, &lo->plh_flags))
pnfs_clear_layoutreturn_waitbit(lo);
@@ -698,6 +710,7 @@ pnfs_mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo,
u32 seq)
{
struct pnfs_layout_segment *lseg, *next;
+ struct nfs_server *server = NFS_SERVER(lo->plh_inode);
int remaining = 0;
dprintk("%s:Begin lo %p\n", __func__, lo);
@@ -710,8 +723,10 @@ pnfs_mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo,
"offset %llu length %llu\n", __func__,
lseg, lseg->pls_range.iomode, lseg->pls_seq,
lseg->pls_range.offset, lseg->pls_range.length);
- if (!mark_lseg_invalid(lseg, tmp_list))
- remaining++;
+ if (mark_lseg_invalid(lseg, tmp_list))
+ continue;
+ remaining++;
+ pnfs_lseg_cancel_io(server, lseg);
}
dprintk("%s:Return %i\n", __func__, remaining);
return remaining;
@@ -1233,7 +1248,7 @@ pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo,
int status = 0;
*pcred = NULL;
- lrp = kzalloc(sizeof(*lrp), GFP_NOFS);
+ lrp = kzalloc(sizeof(*lrp), nfs_io_gfp_mask());
if (unlikely(lrp == NULL)) {
status = -ENOMEM;
spin_lock(&ino->i_lock);
@@ -1896,7 +1911,7 @@ static int pnfs_prepare_to_retry_layoutget(struct pnfs_layout_hdr *lo)
pnfs_layoutcommit_inode(lo->plh_inode, false);
return wait_on_bit_action(&lo->plh_flags, NFS_LAYOUT_RETURN,
nfs_wait_bit_killable,
- TASK_KILLABLE);
+ TASK_KILLABLE|TASK_FREEZABLE_UNSAFE);
}
static void nfs_layoutget_begin(struct pnfs_layout_hdr *lo)
@@ -1906,8 +1921,9 @@ static void nfs_layoutget_begin(struct pnfs_layout_hdr *lo)
static void nfs_layoutget_end(struct pnfs_layout_hdr *lo)
{
- if (atomic_dec_and_test(&lo->plh_outstanding))
- wake_up_var(&lo->plh_outstanding);
+ if (atomic_dec_and_test(&lo->plh_outstanding) &&
+ test_and_clear_bit(NFS_LAYOUT_DRAIN, &lo->plh_flags))
+ wake_up_bit(&lo->plh_flags, NFS_LAYOUT_DRAIN);
}
static bool pnfs_is_first_layoutget(struct pnfs_layout_hdr *lo)
@@ -1989,6 +2005,7 @@ lookup_again:
lo = pnfs_find_alloc_layout(ino, ctx, gfp_flags);
if (lo == NULL) {
spin_unlock(&ino->i_lock);
+ lseg = ERR_PTR(-ENOMEM);
trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg,
PNFS_UPDATE_LAYOUT_NOMEM);
goto out;
@@ -2013,11 +2030,11 @@ lookup_again:
* If the layout segment list is empty, but there are outstanding
* layoutget calls, then they might be subject to a layoutrecall.
*/
- if ((list_empty(&lo->plh_segs) || !pnfs_layout_is_valid(lo)) &&
+ if (test_bit(NFS_LAYOUT_DRAIN, &lo->plh_flags) &&
atomic_read(&lo->plh_outstanding) != 0) {
spin_unlock(&ino->i_lock);
- lseg = ERR_PTR(wait_var_event_killable(&lo->plh_outstanding,
- !atomic_read(&lo->plh_outstanding)));
+ lseg = ERR_PTR(wait_on_bit(&lo->plh_flags, NFS_LAYOUT_DRAIN,
+ TASK_KILLABLE));
if (IS_ERR(lseg))
goto out_put_layout_hdr;
pnfs_put_layout_hdr(lo);
@@ -2117,6 +2134,7 @@ lookup_again:
lgp = pnfs_alloc_init_layoutget_args(ino, ctx, &stateid, &arg, gfp_flags);
if (!lgp) {
+ lseg = ERR_PTR(-ENOMEM);
trace_pnfs_update_layout(ino, pos, count, iomode, lo, NULL,
PNFS_UPDATE_LAYOUT_NOMEM);
nfs_layoutget_end(lo);
@@ -2139,6 +2157,12 @@ lookup_again:
case -ERECALLCONFLICT:
case -EAGAIN:
break;
+ case -ENODATA:
+ /* The server returned NFS4ERR_LAYOUTUNAVAILABLE */
+ pnfs_layout_set_fail_bit(
+ lo, pnfs_iomode_to_fail_bit(iomode));
+ lseg = NULL;
+ goto out_put_layout_hdr;
default:
if (!nfs_error_is_fatal(PTR_ERR(lseg))) {
pnfs_layout_clear_fail_bit(lo, pnfs_iomode_to_fail_bit(iomode));
@@ -2206,7 +2230,7 @@ _pnfs_grab_empty_layout(struct inode *ino, struct nfs_open_context *ctx)
struct pnfs_layout_hdr *lo;
spin_lock(&ino->i_lock);
- lo = pnfs_find_alloc_layout(ino, ctx, GFP_KERNEL);
+ lo = pnfs_find_alloc_layout(ino, ctx, nfs_io_gfp_mask());
if (!lo)
goto out_unlock;
if (!test_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags))
@@ -2249,8 +2273,8 @@ static void _lgopen_prepare_attached(struct nfs4_opendata *data,
lo = _pnfs_grab_empty_layout(ino, ctx);
if (!lo)
return;
- lgp = pnfs_alloc_init_layoutget_args(ino, ctx, &current_stateid,
- &rng, GFP_KERNEL);
+ lgp = pnfs_alloc_init_layoutget_args(ino, ctx, &current_stateid, &rng,
+ nfs_io_gfp_mask());
if (!lgp) {
pnfs_clear_first_layoutget(lo);
nfs_layoutget_end(lo);
@@ -2275,8 +2299,8 @@ static void _lgopen_prepare_floating(struct nfs4_opendata *data,
};
struct nfs4_layoutget *lgp;
- lgp = pnfs_alloc_init_layoutget_args(ino, ctx, &current_stateid,
- &rng, GFP_KERNEL);
+ lgp = pnfs_alloc_init_layoutget_args(ino, ctx, &current_stateid, &rng,
+ nfs_io_gfp_mask());
if (!lgp)
return;
data->lgp = lgp;
@@ -2394,7 +2418,8 @@ pnfs_layout_process(struct nfs4_layoutget *lgp)
goto out_forget;
}
- if (!pnfs_layout_is_valid(lo) && !pnfs_is_first_layoutget(lo))
+ if (test_bit(NFS_LAYOUT_DRAIN, &lo->plh_flags) &&
+ !pnfs_is_first_layoutget(lo))
goto out_forget;
if (nfs4_stateid_match_other(&lo->plh_stateid, &res->stateid)) {
@@ -2463,6 +2488,7 @@ pnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr *lo,
u32 seq)
{
struct pnfs_layout_segment *lseg, *next;
+ struct nfs_server *server = NFS_SERVER(lo->plh_inode);
int remaining = 0;
dprintk("%s:Begin lo %p\n", __func__, lo);
@@ -2485,6 +2511,7 @@ pnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr *lo,
continue;
remaining++;
set_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags);
+ pnfs_lseg_cancel_io(server, lseg);
}
if (remaining) {
@@ -2691,13 +2718,11 @@ pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *r
else
rd_size = nfs_dreq_bytes_left(pgio->pg_dreq);
- pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
- nfs_req_openctx(req),
- req_offset(req),
- rd_size,
- IOMODE_READ,
- false,
- GFP_KERNEL);
+ pgio->pg_lseg =
+ pnfs_update_layout(pgio->pg_inode, nfs_req_openctx(req),
+ req_offset(req), rd_size,
+ IOMODE_READ, false,
+ nfs_io_gfp_mask());
if (IS_ERR(pgio->pg_lseg)) {
pgio->pg_error = PTR_ERR(pgio->pg_lseg);
pgio->pg_lseg = NULL;
@@ -2718,13 +2743,10 @@ pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio,
pnfs_generic_pg_check_layout(pgio);
pnfs_generic_pg_check_range(pgio, req);
if (pgio->pg_lseg == NULL) {
- pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
- nfs_req_openctx(req),
- req_offset(req),
- wb_size,
- IOMODE_RW,
- false,
- GFP_KERNEL);
+ pgio->pg_lseg =
+ pnfs_update_layout(pgio->pg_inode, nfs_req_openctx(req),
+ req_offset(req), wb_size, IOMODE_RW,
+ false, nfs_io_gfp_mask());
if (IS_ERR(pgio->pg_lseg)) {
pgio->pg_error = PTR_ERR(pgio->pg_lseg);
pgio->pg_lseg = NULL;
@@ -2800,7 +2822,6 @@ int pnfs_write_done_resend_to_mds(struct nfs_pgio_header *hdr)
/* Resend all requests through the MDS */
nfs_pageio_init_write(&pgio, hdr->inode, FLUSH_STABLE, true,
hdr->completion_ops);
- set_bit(NFS_CONTEXT_RESEND_WRITES, &hdr->args.context->flags);
return nfs_pageio_resend(&pgio, hdr);
}
EXPORT_SYMBOL_GPL(pnfs_write_done_resend_to_mds);
@@ -3176,14 +3197,14 @@ pnfs_layoutcommit_inode(struct inode *inode, bool sync)
status = wait_on_bit_lock_action(&nfsi->flags,
NFS_INO_LAYOUTCOMMITTING,
nfs_wait_bit_killable,
- TASK_KILLABLE);
+ TASK_KILLABLE|TASK_FREEZABLE_UNSAFE);
if (status)
goto out;
}
status = -ENOMEM;
/* Note kzalloc ensures data->res.seq_res.sr_slot == NULL */
- data = kzalloc(sizeof(*data), GFP_NOFS);
+ data = kzalloc(sizeof(*data), nfs_io_gfp_mask());
if (!data)
goto clear_layoutcommitting;
@@ -3250,7 +3271,7 @@ struct nfs4_threshold *pnfs_mdsthreshold_alloc(void)
{
struct nfs4_threshold *thp;
- thp = kzalloc(sizeof(*thp), GFP_NOFS);
+ thp = kzalloc(sizeof(*thp), nfs_io_gfp_mask());
if (!thp) {
dprintk("%s mdsthreshold allocation failed\n", __func__);
return NULL;
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index f4d7548d67b2..e3e6a41f19de 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -105,6 +105,7 @@ enum {
NFS_LAYOUT_FIRST_LAYOUTGET, /* Serialize first layoutget */
NFS_LAYOUT_INODE_FREEING, /* The inode is being freed */
NFS_LAYOUT_HASHED, /* The layout visible */
+ NFS_LAYOUT_DRAIN,
};
enum layoutdriver_policy_flags {
@@ -168,6 +169,8 @@ struct pnfs_layoutdriver_type {
void (*cleanup_layoutcommit) (struct nfs4_layoutcommit_data *data);
int (*prepare_layoutcommit) (struct nfs4_layoutcommit_args *args);
int (*prepare_layoutstats) (struct nfs42_layoutstat_args *args);
+
+ void (*cancel_io)(struct pnfs_layout_segment *lseg);
};
struct pnfs_commit_ops {
@@ -234,6 +237,8 @@ struct pnfs_devicelist {
extern int pnfs_register_layoutdriver(struct pnfs_layoutdriver_type *);
extern void pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *);
+extern const struct pnfs_layoutdriver_type *pnfs_find_layoutdriver(u32 id);
+extern void pnfs_put_layoutdriver(const struct pnfs_layoutdriver_type *ld);
/* nfs4proc.c */
extern size_t max_response_pages(struct nfs_server *server);
@@ -682,6 +687,13 @@ pnfs_lseg_request_intersecting(struct pnfs_layout_segment *lseg, struct nfs_page
req_offset(req), req_last);
}
+static inline void pnfs_lseg_cancel_io(struct nfs_server *server,
+ struct pnfs_layout_segment *lseg)
+{
+ if (server->pnfs_curr_ld->cancel_io)
+ server->pnfs_curr_ld->cancel_io(lseg);
+}
+
extern unsigned int layoutstats_timer;
#ifdef NFS_DEBUG
diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c
index 316f68f96e57..5d035dd2d7bf 100644
--- a/fs/nfs/pnfs_nfs.c
+++ b/fs/nfs/pnfs_nfs.c
@@ -374,12 +374,12 @@ pnfs_bucket_search_commit_reqs(struct pnfs_commit_bucket *buckets,
return NULL;
}
-/* pnfs_generic_search_commit_reqs - Search lists in @cinfo for the head reqest
+/* pnfs_generic_search_commit_reqs - Search lists in @cinfo for the head request
* for @page
* @cinfo - commit info for current inode
* @page - page to search for matching head request
*
- * Returns a the head request if one is found, otherwise returns NULL.
+ * Return: the head request if one is found, otherwise %NULL.
*/
struct nfs_page *
pnfs_generic_search_commit_reqs(struct nfs_commit_info *cinfo, struct page *page)
@@ -419,7 +419,7 @@ static struct nfs_commit_data *
pnfs_bucket_fetch_commitdata(struct pnfs_commit_bucket *bucket,
struct nfs_commit_info *cinfo)
{
- struct nfs_commit_data *data = nfs_commitdata_alloc(false);
+ struct nfs_commit_data *data = nfs_commitdata_alloc();
if (!data)
return NULL;
@@ -515,7 +515,11 @@ pnfs_generic_commit_pagelist(struct inode *inode, struct list_head *mds_pages,
unsigned int nreq = 0;
if (!list_empty(mds_pages)) {
- data = nfs_commitdata_alloc(true);
+ data = nfs_commitdata_alloc();
+ if (!data) {
+ nfs_retry_commit(mds_pages, NULL, cinfo, -1);
+ return -ENOMEM;
+ }
data->ds_commit_index = -1;
list_splice_init(mds_pages, &data->pages);
list_add_tail(&data->list, &list);
@@ -817,7 +821,7 @@ static void nfs4_clear_ds_conn_bit(struct nfs4_pnfs_ds *ds)
static struct nfs_client *(*get_v3_ds_connect)(
struct nfs_server *mds_srv,
- const struct sockaddr *ds_addr,
+ const struct sockaddr_storage *ds_addr,
int ds_addrlen,
int ds_proto,
unsigned int ds_timeo,
@@ -878,7 +882,7 @@ static int _nfs4_pnfs_v3_ds_connect(struct nfs_server *mds_srv,
continue;
}
clp = get_v3_ds_connect(mds_srv,
- (struct sockaddr *)&da->da_addr,
+ &da->da_addr,
da->da_addrlen, da->da_transport,
timeo, retrans);
if (IS_ERR(clp))
@@ -947,7 +951,7 @@ static int _nfs4_pnfs_v4_ds_connect(struct nfs_server *mds_srv,
put_cred(xprtdata.cred);
} else {
clp = nfs4_set_ds_client(mds_srv,
- (struct sockaddr *)&da->da_addr,
+ &da->da_addr,
da->da_addrlen,
da->da_transport, timeo,
retrans, minor_version);
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index 73dcaa99fa9b..e3570c656b0f 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -92,6 +92,7 @@ nfs_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
info->maxfilesize = 0x7FFFFFFF;
info->lease_time = 0;
info->change_attr_type = NFS4_CHANGE_TYPE_IS_UNDEFINED;
+ info->xattr_support = 0;
return 0;
}
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index d11af2a9299c..8ae2c8d1219d 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -120,12 +120,8 @@ static void nfs_readpage_release(struct nfs_page *req, int error)
if (nfs_error_is_fatal_on_server(error) && error != -ETIMEDOUT)
SetPageError(page);
if (nfs_page_group_sync_on_bit(req, PG_UNLOCKPAGE)) {
- struct address_space *mapping = page_file_mapping(page);
-
if (PageUptodate(page))
- nfs_readpage_to_fscache(inode, page, 0);
- else if (!PageError(page) && !PagePrivate(page))
- generic_error_remove_page(mapping, page);
+ nfs_fscache_write_page(inode, page);
unlock_page(page);
}
nfs_release_request(req);
@@ -194,10 +190,6 @@ static void nfs_initiate_read(struct nfs_pgio_header *hdr,
const struct nfs_rpc_ops *rpc_ops,
struct rpc_task_setup *task_setup_data, int how)
{
- struct inode *inode = hdr->inode;
- int swap_flags = IS_SWAPFILE(inode) ? NFS_RPC_SWAPFLAGS : 0;
-
- task_setup_data->flags |= swap_flags;
rpc_ops->read_setup(hdr, msg);
trace_nfs_initiate_read(hdr);
}
@@ -290,9 +282,8 @@ static void nfs_readpage_result(struct rpc_task *task,
}
static int
-readpage_async_filler(void *data, struct page *page)
+readpage_async_filler(struct nfs_readdesc *desc, struct page *page)
{
- struct nfs_readdesc *desc = data;
struct inode *inode = page_file_mapping(page)->host;
unsigned int rsize = NFS_SERVER(inode)->rsize;
struct nfs_page *new;
@@ -305,6 +296,12 @@ readpage_async_filler(void *data, struct page *page)
aligned_len = min_t(unsigned int, ALIGN(len, rsize), PAGE_SIZE);
+ if (!IS_SYNC(page->mapping->host)) {
+ error = nfs_fscache_read_page(page->mapping->host, page);
+ if (error == 0)
+ goto out_unlock;
+ }
+
new = nfs_create_request(desc->ctx, page, 0, aligned_len);
if (IS_ERR(new))
goto out_error;
@@ -320,6 +317,7 @@ readpage_async_filler(void *data, struct page *page)
return 0;
out_error:
error = PTR_ERR(new);
+out_unlock:
unlock_page(page);
out:
return error;
@@ -331,8 +329,9 @@ out:
* - The error flag is set for this page. This happens only when a
* previous async read operation failed.
*/
-int nfs_readpage(struct file *file, struct page *page)
+int nfs_read_folio(struct file *file, struct folio *folio)
{
+ struct page *page = &folio->page;
struct nfs_readdesc desc;
struct inode *inode = page_file_mapping(page)->host;
int ret;
@@ -366,12 +365,6 @@ int nfs_readpage(struct file *file, struct page *page)
desc.ctx = get_nfs_open_context(nfs_file_open_context(file));
xchg(&desc.ctx->error, 0);
- if (!IS_SYNC(inode)) {
- ret = nfs_readpage_from_fscache(desc.ctx, inode, page);
- if (ret == 0)
- goto out_wait;
- }
-
nfs_pageio_init_read(&desc.pgio, inode, false,
&nfs_async_read_completion_ops);
@@ -381,7 +374,6 @@ int nfs_readpage(struct file *file, struct page *page)
nfs_pageio_complete_read(&desc.pgio);
ret = desc.pgio.pg_error < 0 ? desc.pgio.pg_error : 0;
-out_wait:
if (!ret) {
ret = wait_on_page_locked_killable(page);
if (!PageUptodate(page) && !ret)
@@ -397,14 +389,16 @@ out_unlock:
return ret;
}
-int nfs_readpages(struct file *file, struct address_space *mapping,
- struct list_head *pages, unsigned nr_pages)
+void nfs_readahead(struct readahead_control *ractl)
{
+ unsigned int nr_pages = readahead_count(ractl);
+ struct file *file = ractl->file;
struct nfs_readdesc desc;
- struct inode *inode = mapping->host;
+ struct inode *inode = ractl->mapping->host;
+ struct page *page;
int ret;
- trace_nfs_aop_readahead(inode, lru_to_page(pages), nr_pages);
+ trace_nfs_aop_readahead(inode, readahead_pos(ractl), nr_pages);
nfs_inc_stats(inode, NFSIOS_VFSREADPAGES);
ret = -ESTALE;
@@ -419,26 +413,21 @@ int nfs_readpages(struct file *file, struct address_space *mapping,
} else
desc.ctx = get_nfs_open_context(nfs_file_open_context(file));
- /* attempt to read as many of the pages as possible from the cache
- * - this returns -ENOBUFS immediately if the cookie is negative
- */
- ret = nfs_readpages_from_fscache(desc.ctx, inode, mapping,
- pages, &nr_pages);
- if (ret == 0)
- goto read_complete; /* all pages were read */
-
nfs_pageio_init_read(&desc.pgio, inode, false,
&nfs_async_read_completion_ops);
- ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc);
+ while ((page = readahead_page(ractl)) != NULL) {
+ ret = readpage_async_filler(&desc, page);
+ put_page(page);
+ if (ret)
+ break;
+ }
nfs_pageio_complete_read(&desc.pgio);
-read_complete:
put_nfs_open_context(desc.ctx);
out:
trace_nfs_aop_readahead_done(inode, nr_pages, ret);
- return ret;
}
int __init nfs_init_readpagecache(void)
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 3aced401735c..05ae23657527 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -149,7 +149,7 @@ int __init register_nfs_fs(void)
ret = nfs_register_sysctl();
if (ret < 0)
goto error_2;
- ret = register_shrinker(&acl_shrinker);
+ ret = register_shrinker(&acl_shrinker, "nfs-acl");
if (ret < 0)
goto error_3;
#ifdef CONFIG_NFS_V4_2
@@ -822,8 +822,7 @@ static int nfs_request_mount(struct fs_context *fc,
{
struct nfs_fs_context *ctx = nfs_fc2context(fc);
struct nfs_mount_request request = {
- .sap = (struct sockaddr *)
- &ctx->mount_server.address,
+ .sap = &ctx->mount_server._address,
.dirpath = ctx->nfs_server.export_path,
.protocol = ctx->mount_server.protocol,
.fh = root_fh,
@@ -854,7 +853,7 @@ static int nfs_request_mount(struct fs_context *fc,
* Construct the mount server's address.
*/
if (ctx->mount_server.address.sa_family == AF_UNSPEC) {
- memcpy(request.sap, &ctx->nfs_server.address,
+ memcpy(request.sap, &ctx->nfs_server._address,
ctx->nfs_server.addrlen);
ctx->mount_server.addrlen = ctx->nfs_server.addrlen;
}
@@ -1051,22 +1050,31 @@ static void nfs_fill_super(struct super_block *sb, struct nfs_fs_context *ctx)
if (ctx->bsize)
sb->s_blocksize = nfs_block_size(ctx->bsize, &sb->s_blocksize_bits);
- if (server->nfs_client->rpc_ops->version != 2) {
- /* The VFS shouldn't apply the umask to mode bits. We will do
- * so ourselves when necessary.
+ switch (server->nfs_client->rpc_ops->version) {
+ case 2:
+ sb->s_time_gran = 1000;
+ sb->s_time_min = 0;
+ sb->s_time_max = U32_MAX;
+ break;
+ case 3:
+ /*
+ * The VFS shouldn't apply the umask to mode bits.
+ * We will do so ourselves when necessary.
*/
sb->s_flags |= SB_POSIXACL;
sb->s_time_gran = 1;
- sb->s_export_op = &nfs_export_ops;
- } else
- sb->s_time_gran = 1000;
-
- if (server->nfs_client->rpc_ops->version != 4) {
sb->s_time_min = 0;
sb->s_time_max = U32_MAX;
- } else {
+ sb->s_export_op = &nfs_export_ops;
+ break;
+ case 4:
+ sb->s_flags |= SB_POSIXACL;
+ sb->s_time_gran = 1;
sb->s_time_min = S64_MIN;
sb->s_time_max = S64_MAX;
+ if (server->caps & NFS_CAP_ATOMIC_OPEN_V1)
+ sb->s_export_op = &nfs_export_ops;
+ break;
}
sb->s_magic = NFS_SUPER_MAGIC;
@@ -1204,42 +1212,42 @@ static int nfs_compare_super(struct super_block *sb, struct fs_context *fc)
}
#ifdef CONFIG_NFS_FSCACHE
-static void nfs_get_cache_cookie(struct super_block *sb,
- struct nfs_fs_context *ctx)
+static int nfs_get_cache_cookie(struct super_block *sb,
+ struct nfs_fs_context *ctx)
{
struct nfs_server *nfss = NFS_SB(sb);
char *uniq = NULL;
int ulen = 0;
- nfss->fscache_key = NULL;
nfss->fscache = NULL;
if (!ctx)
- return;
+ return 0;
if (ctx->clone_data.sb) {
struct nfs_server *mnt_s = NFS_SB(ctx->clone_data.sb);
if (!(mnt_s->options & NFS_OPTION_FSCACHE))
- return;
- if (mnt_s->fscache_key) {
- uniq = mnt_s->fscache_key->key.uniquifier;
- ulen = mnt_s->fscache_key->key.uniq_len;
+ return 0;
+ if (mnt_s->fscache_uniq) {
+ uniq = mnt_s->fscache_uniq;
+ ulen = strlen(uniq);
}
} else {
if (!(ctx->options & NFS_OPTION_FSCACHE))
- return;
+ return 0;
if (ctx->fscache_uniq) {
uniq = ctx->fscache_uniq;
ulen = strlen(ctx->fscache_uniq);
}
}
- nfs_fscache_get_super_cookie(sb, uniq, ulen);
+ return nfs_fscache_get_super_cookie(sb, uniq, ulen);
}
#else
-static void nfs_get_cache_cookie(struct super_block *sb,
- struct nfs_fs_context *ctx)
+static int nfs_get_cache_cookie(struct super_block *sb,
+ struct nfs_fs_context *ctx)
{
+ return 0;
}
#endif
@@ -1299,7 +1307,9 @@ int nfs_get_tree_common(struct fs_context *fc)
s->s_blocksize_bits = bsize;
s->s_blocksize = 1U << bsize;
}
- nfs_get_cache_cookie(s, ctx);
+ error = nfs_get_cache_cookie(s, ctx);
+ if (error < 0)
+ goto error_splat_super;
}
error = nfs_get_root(s, fc);
diff --git a/fs/nfs/symlink.c b/fs/nfs/symlink.c
index 25ba299fdac2..0e27a2e4e68b 100644
--- a/fs/nfs/symlink.c
+++ b/fs/nfs/symlink.c
@@ -26,21 +26,21 @@
* and straight-forward than readdir caching.
*/
-static int nfs_symlink_filler(void *data, struct page *page)
+static int nfs_symlink_filler(struct file *file, struct folio *folio)
{
- struct inode *inode = data;
+ struct inode *inode = folio->mapping->host;
int error;
- error = NFS_PROTO(inode)->readlink(inode, page, 0, PAGE_SIZE);
+ error = NFS_PROTO(inode)->readlink(inode, &folio->page, 0, PAGE_SIZE);
if (error < 0)
goto error;
- SetPageUptodate(page);
- unlock_page(page);
+ folio_mark_uptodate(folio);
+ folio_unlock(folio);
return 0;
error:
- SetPageError(page);
- unlock_page(page);
+ folio_set_error(folio);
+ folio_unlock(folio);
return -EIO;
}
@@ -67,7 +67,7 @@ static const char *nfs_get_link(struct dentry *dentry,
if (err)
return err;
page = read_cache_page(&inode->i_data, 0, nfs_symlink_filler,
- inode);
+ NULL);
if (IS_ERR(page))
return ERR_CAST(page);
}
diff --git a/fs/nfs/sysfs.c b/fs/nfs/sysfs.c
index 8cb70755e3c9..a6f740366963 100644
--- a/fs/nfs/sysfs.c
+++ b/fs/nfs/sysfs.c
@@ -142,10 +142,11 @@ static struct attribute *nfs_netns_client_attrs[] = {
&nfs_netns_client_id.attr,
NULL,
};
+ATTRIBUTE_GROUPS(nfs_netns_client);
static struct kobj_type nfs_netns_client_type = {
.release = nfs_netns_client_release,
- .default_attrs = nfs_netns_client_attrs,
+ .default_groups = nfs_netns_client_groups,
.sysfs_ops = &kobj_sysfs_ops,
.namespace = nfs_netns_client_namespace,
};
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
index 5fa11e1aca4c..9697cd5d2561 100644
--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c
@@ -102,6 +102,10 @@ static void nfs_do_call_unlink(struct inode *inode, struct nfs_unlinkdata *data)
};
struct rpc_task *task;
struct inode *dir = d_inode(data->dentry->d_parent);
+
+ if (nfs_server_capable(inode, NFS_CAP_MOVEABLE))
+ task_setup_data.flags |= RPC_TASK_MOVEABLE;
+
nfs_sb_active(dir->i_sb);
data->args.fh = NFS_FH(dir);
nfs_fattr_init(data->res.dir_attr);
@@ -344,9 +348,14 @@ nfs_async_rename(struct inode *old_dir, struct inode *new_dir,
.flags = RPC_TASK_ASYNC | RPC_TASK_CRED_NOREF,
};
+ if (nfs_server_capable(old_dir, NFS_CAP_MOVEABLE) &&
+ nfs_server_capable(new_dir, NFS_CAP_MOVEABLE))
+ task_setup_data.flags |= RPC_TASK_MOVEABLE;
+
data = kzalloc(sizeof(*data), GFP_KERNEL);
if (data == NULL)
return ERR_PTR(-ENOMEM);
+ task_setup_data.task = &data->task;
task_setup_data.callback_data = data;
data->cred = get_current_cred();
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 9b7619ce17a7..f41d24b54fd1 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -70,27 +70,17 @@ static mempool_t *nfs_wdata_mempool;
static struct kmem_cache *nfs_cdata_cachep;
static mempool_t *nfs_commit_mempool;
-struct nfs_commit_data *nfs_commitdata_alloc(bool never_fail)
+struct nfs_commit_data *nfs_commitdata_alloc(void)
{
struct nfs_commit_data *p;
- if (never_fail)
- p = mempool_alloc(nfs_commit_mempool, GFP_NOIO);
- else {
- /* It is OK to do some reclaim, not no safe to wait
- * for anything to be returned to the pool.
- * mempool_alloc() cannot handle that particular combination,
- * so we need two separate attempts.
- */
+ p = kmem_cache_zalloc(nfs_cdata_cachep, nfs_io_gfp_mask());
+ if (!p) {
p = mempool_alloc(nfs_commit_mempool, GFP_NOWAIT);
if (!p)
- p = kmem_cache_alloc(nfs_cdata_cachep, GFP_NOIO |
- __GFP_NOWARN | __GFP_NORETRY);
- if (!p)
return NULL;
+ memset(p, 0, sizeof(*p));
}
-
- memset(p, 0, sizeof(*p));
INIT_LIST_HEAD(&p->pages);
return p;
}
@@ -104,9 +94,15 @@ EXPORT_SYMBOL_GPL(nfs_commit_free);
static struct nfs_pgio_header *nfs_writehdr_alloc(void)
{
- struct nfs_pgio_header *p = mempool_alloc(nfs_wdata_mempool, GFP_KERNEL);
+ struct nfs_pgio_header *p;
- memset(p, 0, sizeof(*p));
+ p = kmem_cache_zalloc(nfs_wdata_cachep, nfs_io_gfp_mask());
+ if (!p) {
+ p = mempool_alloc(nfs_wdata_mempool, GFP_NOWAIT);
+ if (!p)
+ return NULL;
+ memset(p, 0, sizeof(*p));
+ }
p->rw_mode = FMODE_WRITE;
return p;
}
@@ -294,6 +290,7 @@ static void nfs_grow_file(struct page *page, unsigned int offset, unsigned int c
nfs_inc_stats(inode, NFSIOS_EXTENDWRITE);
out:
spin_unlock(&inode->i_lock);
+ nfs_fscache_invalidate(inode, 0);
}
/* A writeback failed: mark the page as bad, and invalidate the page cache */
@@ -305,7 +302,7 @@ static void nfs_set_pageerror(struct address_space *mapping)
/* Force file size revalidation */
spin_lock(&inode->i_lock);
nfs_set_cache_invalid(inode, NFS_INO_REVAL_FORCED |
- NFS_INO_REVAL_PAGECACHE |
+ NFS_INO_INVALID_CHANGE |
NFS_INO_INVALID_SIZE);
spin_unlock(&inode->i_lock);
}
@@ -315,7 +312,10 @@ static void nfs_mapping_set_error(struct page *page, int error)
struct address_space *mapping = page_file_mapping(page);
SetPageError(page);
- mapping_set_error(mapping, error);
+ filemap_set_wb_err(mapping, error);
+ if (mapping->host)
+ errseq_set(&mapping->host->i_sb->s_wb_err,
+ error == -ENOSPC ? -ENOSPC : -EIO);
nfs_set_pageerror(mapping);
}
@@ -416,7 +416,7 @@ static void nfs_set_page_writeback(struct page *page)
if (atomic_long_inc_return(&nfss->writeback) >
NFS_CONGESTION_ON_THRESH)
- set_bdi_congested(inode_to_bdi(inode), BLK_RW_ASYNC);
+ nfss->write_congested = 1;
}
static void nfs_end_page_writeback(struct nfs_page *req)
@@ -432,7 +432,7 @@ static void nfs_end_page_writeback(struct nfs_page *req)
end_page_writeback(req->wb_page);
if (atomic_long_dec_return(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH)
- clear_bdi_congested(inode_to_bdi(inode), BLK_RW_ASYNC);
+ nfss->write_congested = 0;
}
/*
@@ -592,7 +592,8 @@ nfs_lock_and_join_requests(struct page *page)
static void nfs_write_error(struct nfs_page *req, int error)
{
- trace_nfs_write_error(req, error);
+ trace_nfs_write_error(page_file_mapping(req->wb_page)->host, req,
+ error);
nfs_mapping_set_error(req->wb_page, error);
nfs_inode_remove_request(req);
nfs_end_page_writeback(req);
@@ -603,8 +604,9 @@ static void nfs_write_error(struct nfs_page *req, int error)
* Find an associated nfs write request, and prepare to flush it out
* May return an error if the user signalled nfs_wait_on_request().
*/
-static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio,
- struct page *page)
+static int nfs_page_async_flush(struct page *page,
+ struct writeback_control *wbc,
+ struct nfs_pageio_descriptor *pgio)
{
struct nfs_page *req;
int ret = 0;
@@ -630,11 +632,11 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio,
/*
* Remove the problematic req upon fatal errors on the server
*/
- if (nfs_error_is_fatal(ret)) {
- if (nfs_error_is_fatal_on_server(ret))
- goto out_launder;
- } else
- ret = -EAGAIN;
+ if (nfs_error_is_fatal_on_server(ret))
+ goto out_launder;
+ if (wbc->sync_mode == WB_SYNC_NONE)
+ ret = AOP_WRITEPAGE_ACTIVATE;
+ redirty_page_for_writepage(wbc, page);
nfs_redirty_request(req);
pgio->pg_error = 0;
} else
@@ -650,15 +652,8 @@ out_launder:
static int nfs_do_writepage(struct page *page, struct writeback_control *wbc,
struct nfs_pageio_descriptor *pgio)
{
- int ret;
-
nfs_pageio_cond_complete(pgio, page_index(page));
- ret = nfs_page_async_flush(pgio, page);
- if (ret == -EAGAIN) {
- redirty_page_for_writepage(wbc, page);
- ret = AOP_WRITEPAGE_ACTIVATE;
- }
- return ret;
+ return nfs_page_async_flush(page, wbc, pgio);
}
/*
@@ -671,17 +666,17 @@ static int nfs_writepage_locked(struct page *page,
struct inode *inode = page_file_mapping(page)->host;
int err;
+ if (wbc->sync_mode == WB_SYNC_NONE &&
+ NFS_SERVER(inode)->write_congested)
+ return AOP_WRITEPAGE_ACTIVATE;
+
nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE);
nfs_pageio_init_write(&pgio, inode, 0,
false, &nfs_async_write_completion_ops);
err = nfs_do_writepage(page, wbc, &pgio);
pgio.pg_error = 0;
nfs_pageio_complete(&pgio);
- if (err < 0)
- return err;
- if (nfs_error_is_fatal(pgio.pg_error))
- return pgio.pg_error;
- return 0;
+ return err;
}
int nfs_writepage(struct page *page, struct writeback_control *wbc)
@@ -718,6 +713,10 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
int priority = 0;
int err;
+ if (wbc->sync_mode == WB_SYNC_NONE &&
+ NFS_SERVER(inode)->write_congested)
+ return 0;
+
nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES);
if (!(mntflags & NFS_MOUNT_WRITE_EAGER) || wbc->for_kupdate ||
@@ -729,19 +728,19 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
priority = wb_priority(wbc);
}
- nfs_pageio_init_write(&pgio, inode, priority, false,
- &nfs_async_write_completion_ops);
- pgio.pg_io_completion = ioc;
- err = write_cache_pages(mapping, wbc, nfs_writepages_callback, &pgio);
- pgio.pg_error = 0;
- nfs_pageio_complete(&pgio);
+ do {
+ nfs_pageio_init_write(&pgio, inode, priority, false,
+ &nfs_async_write_completion_ops);
+ pgio.pg_io_completion = ioc;
+ err = write_cache_pages(mapping, wbc, nfs_writepages_callback,
+ &pgio);
+ pgio.pg_error = 0;
+ nfs_pageio_complete(&pgio);
+ } while (err < 0 && !nfs_error_is_fatal(err));
nfs_io_completion_put(ioc);
if (err < 0)
goto out_err;
- err = pgio.pg_error;
- if (nfs_error_is_fatal(err))
- goto out_err;
return 0;
out_err:
return err;
@@ -1002,7 +1001,7 @@ static void nfs_write_completion(struct nfs_pgio_header *hdr)
nfs_list_remove_request(req);
if (test_bit(NFS_IOHDR_ERROR, &hdr->flags) &&
(hdr->good_bytes < bytes)) {
- trace_nfs_comp_error(req, hdr->error);
+ trace_nfs_comp_error(hdr->inode, req, hdr->error);
nfs_mapping_set_error(req->wb_page, hdr->error);
goto remove_req;
}
@@ -1408,6 +1407,8 @@ static void nfs_initiate_write(struct nfs_pgio_header *hdr,
{
int priority = flush_task_priority(how);
+ if (IS_SWAPFILE(hdr->inode))
+ task_setup_data->flags |= RPC_TASK_SWAPPER;
task_setup_data->priority = priority;
rpc_ops->write_setup(hdr, msg, &task_setup_data->rpc_client);
trace_nfs_initiate_write(hdr);
@@ -1419,10 +1420,12 @@ static void nfs_initiate_write(struct nfs_pgio_header *hdr,
*/
static void nfs_redirty_request(struct nfs_page *req)
{
+ struct nfs_inode *nfsi = NFS_I(page_file_mapping(req->wb_page)->host);
+
/* Bump the transmission count */
req->wb_nio++;
nfs_mark_request_dirty(req);
- set_bit(NFS_CONTEXT_RESEND_WRITES, &nfs_req_openctx(req)->flags);
+ atomic_long_inc(&nfsi->redirtied_pages);
nfs_end_page_writeback(req);
nfs_release_request(req);
}
@@ -1434,7 +1437,7 @@ static void nfs_async_write_error(struct list_head *head, int error)
while (!list_empty(head)) {
req = nfs_list_entry(head->next);
nfs_list_remove_request(req);
- if (nfs_error_is_fatal(error))
+ if (nfs_error_is_fatal_on_server(error))
nfs_write_error(req, error);
else
nfs_redirty_request(req);
@@ -1444,8 +1447,6 @@ static void nfs_async_write_error(struct list_head *head, int error)
static void nfs_async_write_reschedule_io(struct nfs_pgio_header *hdr)
{
nfs_async_write_error(&hdr->pages, 0);
- filemap_fdatawrite_range(hdr->inode->i_mapping, hdr->args.offset,
- hdr->args.offset + hdr->args.count - 1);
}
static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops = {
@@ -1495,31 +1496,6 @@ void nfs_commit_prepare(struct rpc_task *task, void *calldata)
NFS_PROTO(data->inode)->commit_rpc_prepare(task, data);
}
-/*
- * Special version of should_remove_suid() that ignores capabilities.
- */
-static int nfs_should_remove_suid(const struct inode *inode)
-{
- umode_t mode = inode->i_mode;
- int kill = 0;
-
- /* suid always must be killed */
- if (unlikely(mode & S_ISUID))
- kill = ATTR_KILL_SUID;
-
- /*
- * sgid without any exec bits is just a mandatory locking mark; leave
- * it alone. If some exec bits are set, it's a real sgid; kill it.
- */
- if (unlikely((mode & S_ISGID) && (mode & S_IXGRP)))
- kill |= ATTR_KILL_SGID;
-
- if (unlikely(kill && S_ISREG(mode)))
- return kill;
-
- return 0;
-}
-
static void nfs_writeback_check_extend(struct nfs_pgio_header *hdr,
struct nfs_fattr *fattr)
{
@@ -1576,25 +1552,37 @@ static int nfs_writeback_done(struct rpc_task *task,
nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, hdr->res.count);
trace_nfs_writeback_done(task, hdr);
- if (hdr->res.verf->committed < hdr->args.stable &&
- task->tk_status >= 0) {
- /* We tried a write call, but the server did not
- * commit data to stable storage even though we
- * requested it.
- * Note: There is a known bug in Tru64 < 5.0 in which
- * the server reports NFS_DATA_SYNC, but performs
- * NFS_FILE_SYNC. We therefore implement this checking
- * as a dprintk() in order to avoid filling syslog.
- */
- static unsigned long complain;
+ if (task->tk_status >= 0) {
+ enum nfs3_stable_how committed = hdr->res.verf->committed;
+
+ if (committed == NFS_UNSTABLE) {
+ /*
+ * We have some uncommitted data on the server at
+ * this point, so ensure that we keep track of that
+ * fact irrespective of what later writes do.
+ */
+ set_bit(NFS_IOHDR_UNSTABLE_WRITES, &hdr->flags);
+ }
+
+ if (committed < hdr->args.stable) {
+ /* We tried a write call, but the server did not
+ * commit data to stable storage even though we
+ * requested it.
+ * Note: There is a known bug in Tru64 < 5.0 in which
+ * the server reports NFS_DATA_SYNC, but performs
+ * NFS_FILE_SYNC. We therefore implement this checking
+ * as a dprintk() in order to avoid filling syslog.
+ */
+ static unsigned long complain;
- /* Note this will print the MDS for a DS write */
- if (time_before(complain, jiffies)) {
- dprintk("NFS: faulty NFS server %s:"
- " (committed = %d) != (stable = %d)\n",
- NFS_SERVER(inode)->nfs_client->cl_hostname,
- hdr->res.verf->committed, hdr->args.stable);
- complain = jiffies + 300 * HZ;
+ /* Note this will print the MDS for a DS write */
+ if (time_before(complain, jiffies)) {
+ dprintk("NFS: faulty NFS server %s:"
+ " (committed = %d) != (stable = %d)\n",
+ NFS_SERVER(inode)->nfs_client->cl_hostname,
+ committed, hdr->args.stable);
+ complain = jiffies + 300 * HZ;
+ }
}
}
@@ -1709,6 +1697,10 @@ int nfs_initiate_commit(struct rpc_clnt *clnt, struct nfs_commit_data *data,
.flags = RPC_TASK_ASYNC | flags,
.priority = priority,
};
+
+ if (nfs_server_capable(data->inode, NFS_CAP_MOVEABLE))
+ task_setup_data.flags |= RPC_TASK_MOVEABLE;
+
/* Set up the initial task struct. */
nfs_ops->commit_setup(data, &msg, &task_setup_data.rpc_client);
trace_nfs_initiate_commit(data);
@@ -1820,7 +1812,11 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how,
if (list_empty(head))
return 0;
- data = nfs_commitdata_alloc(true);
+ data = nfs_commitdata_alloc();
+ if (!data) {
+ nfs_retry_commit(head, NULL, cinfo, -1);
+ return -ENOMEM;
+ }
/* Set up the argument struct */
nfs_init_commit(data, head, NULL, cinfo);
@@ -1864,7 +1860,8 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data)
(long long)req_offset(req));
if (status < 0) {
if (req->wb_page) {
- trace_nfs_commit_error(req, status);
+ trace_nfs_commit_error(data->inode, req,
+ status);
nfs_mapping_set_error(req->wb_page, status);
nfs_inode_remove_request(req);
}
@@ -1884,7 +1881,7 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data)
/* We have a mismatch. Write the page again */
dprintk_cont(" mismatch\n");
nfs_mark_request_dirty(req);
- set_bit(NFS_CONTEXT_RESEND_WRITES, &nfs_req_openctx(req)->flags);
+ atomic_long_inc(&NFS_I(data->inode)->redirtied_pages);
next:
nfs_unlock_and_release_request(req);
/* Latency breaker */
@@ -1892,7 +1889,7 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data)
}
nfss = NFS_SERVER(data->inode);
if (atomic_long_read(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH)
- clear_bdi_congested(inode_to_bdi(data->inode), BLK_RW_ASYNC);
+ nfss->write_congested = 0;
nfs_init_cinfo(&cinfo, data->inode, data->dreq);
nfs_commit_end(cinfo.mds);
@@ -2048,21 +2045,21 @@ out:
}
EXPORT_SYMBOL_GPL(nfs_wb_all);
-int nfs_wb_page_cancel(struct inode *inode, struct page *page)
+int nfs_wb_folio_cancel(struct inode *inode, struct folio *folio)
{
struct nfs_page *req;
int ret = 0;
- wait_on_page_writeback(page);
+ folio_wait_writeback(folio);
/* blocking call to cancel all requests and join to a single (head)
* request */
- req = nfs_lock_and_join_requests(page);
+ req = nfs_lock_and_join_requests(&folio->page);
if (IS_ERR(req)) {
ret = PTR_ERR(req);
} else if (req) {
- /* all requests from this page have been cancelled by
+ /* all requests from this folio have been cancelled by
* nfs_lock_and_join_requests, so just remove the head
* request from the inode / page_private pointer and
* release it */
@@ -2111,24 +2108,27 @@ out_error:
}
#ifdef CONFIG_MIGRATION
-int nfs_migrate_page(struct address_space *mapping, struct page *newpage,
- struct page *page, enum migrate_mode mode)
+int nfs_migrate_folio(struct address_space *mapping, struct folio *dst,
+ struct folio *src, enum migrate_mode mode)
{
/*
- * If PagePrivate is set, then the page is currently associated with
+ * If the private flag is set, the folio is currently associated with
* an in-progress read or write request. Don't try to migrate it.
*
* FIXME: we could do this in principle, but we'll need a way to ensure
* that we can safely release the inode reference while holding
- * the page lock.
+ * the folio lock.
*/
- if (PagePrivate(page))
+ if (folio_test_private(src))
return -EBUSY;
- if (!nfs_fscache_release_page(page, GFP_KERNEL))
- return -EBUSY;
+ if (folio_test_fscache(src)) {
+ if (mode == MIGRATE_ASYNC)
+ return -EBUSY;
+ folio_wait_fscache(src);
+ }
- return migrate_page(mapping, newpage, page, mode);
+ return migrate_folio(mapping, dst, src, mode);
}
#endif