aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/fs/nfs
diff options
context:
space:
mode:
authorJames Morris <james.l.morris@oracle.com>2014-11-19 21:32:12 +1100
committerJames Morris <james.l.morris@oracle.com>2014-11-19 21:32:12 +1100
commitb10778a00d40b3d9fdaaf5891e802794781ff71c (patch)
tree6ba4cbac86eecedc3f30650e7f764ecf00c83898 /fs/nfs
parentintegrity: do zero padding of the key id (diff)
parentLinux 3.17 (diff)
downloadwireguard-linux-b10778a00d40b3d9fdaaf5891e802794781ff71c.tar.xz
wireguard-linux-b10778a00d40b3d9fdaaf5891e802794781ff71c.zip
Merge commit 'v3.17' into next
Diffstat (limited to '')
-rw-r--r--fs/nfs/blocklayout/blocklayout.c101
-rw-r--r--fs/nfs/callback.c12
-rw-r--r--fs/nfs/client.c107
-rw-r--r--fs/nfs/delegation.c34
-rw-r--r--fs/nfs/delegation.h1
-rw-r--r--fs/nfs/dir.c208
-rw-r--r--fs/nfs/direct.c33
-rw-r--r--fs/nfs/file.c4
-rw-r--r--fs/nfs/filelayout/filelayout.c299
-rw-r--r--fs/nfs/filelayout/filelayoutdev.c6
-rw-r--r--fs/nfs/getroot.c2
-rw-r--r--fs/nfs/inode.c18
-rw-r--r--fs/nfs/internal.h22
-rw-r--r--fs/nfs/netns.h3
-rw-r--r--fs/nfs/nfs3acl.c7
-rw-r--r--fs/nfs/nfs3proc.c21
-rw-r--r--fs/nfs/nfs4_fs.h21
-rw-r--r--fs/nfs/nfs4client.c43
-rw-r--r--fs/nfs/nfs4proc.c292
-rw-r--r--fs/nfs/nfs4state.c49
-rw-r--r--fs/nfs/nfs4trace.h28
-rw-r--r--fs/nfs/nfs4xdr.c2
-rw-r--r--fs/nfs/objlayout/objio_osd.c24
-rw-r--r--fs/nfs/objlayout/objlayout.c81
-rw-r--r--fs/nfs/objlayout/objlayout.h8
-rw-r--r--fs/nfs/pagelist.c330
-rw-r--r--fs/nfs/pnfs.c180
-rw-r--r--fs/nfs/pnfs.h45
-rw-r--r--fs/nfs/proc.c27
-rw-r--r--fs/nfs/read.c54
-rw-r--r--fs/nfs/super.c12
-rw-r--r--fs/nfs/write.c167
-rw-r--r--fs/nfs_common/nfsacl.c5
-rw-r--r--fs/nfsd/acl.h2
-rw-r--r--fs/nfsd/auth.c2
-rw-r--r--fs/nfsd/export.c6
-rw-r--r--fs/nfsd/export.h3
-rw-r--r--fs/nfsd/fault_inject.c138
-rw-r--r--fs/nfsd/netns.h23
-rw-r--r--fs/nfsd/nfs2acl.c8
-rw-r--r--fs/nfsd/nfs3acl.c8
-rw-r--r--fs/nfsd/nfs3proc.c9
-rw-r--r--fs/nfsd/nfs3xdr.c30
-rw-r--r--fs/nfsd/nfs4acl.c39
-rw-r--r--fs/nfsd/nfs4callback.c32
-rw-r--r--fs/nfsd/nfs4proc.c53
-rw-r--r--fs/nfsd/nfs4state.c3096
-rw-r--r--fs/nfsd/nfs4xdr.c145
-rw-r--r--fs/nfsd/nfscache.c13
-rw-r--r--fs/nfsd/nfsctl.c51
-rw-r--r--fs/nfsd/nfsfh.c12
-rw-r--r--fs/nfsd/nfsfh.h15
-rw-r--r--fs/nfsd/nfsproc.c13
-rw-r--r--fs/nfsd/nfssvc.c21
-rw-r--r--fs/nfsd/nfsxdr.c14
-rw-r--r--fs/nfsd/state.h220
-rw-r--r--fs/nfsd/vfs.c48
-rw-r--r--fs/nfsd/vfs.h8
-rw-r--r--fs/nfsd/xdr4.h30
59 files changed, 3930 insertions, 2355 deletions
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index 9b431f44fad9..cbb1797149d5 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -210,8 +210,7 @@ static void bl_end_io_read(struct bio *bio, int err)
SetPageUptodate(bvec->bv_page);
if (err) {
- struct nfs_pgio_data *rdata = par->data;
- struct nfs_pgio_header *header = rdata->header;
+ struct nfs_pgio_header *header = par->data;
if (!header->pnfs_error)
header->pnfs_error = -EIO;
@@ -224,43 +223,44 @@ static void bl_end_io_read(struct bio *bio, int err)
static void bl_read_cleanup(struct work_struct *work)
{
struct rpc_task *task;
- struct nfs_pgio_data *rdata;
+ struct nfs_pgio_header *hdr;
dprintk("%s enter\n", __func__);
task = container_of(work, struct rpc_task, u.tk_work);
- rdata = container_of(task, struct nfs_pgio_data, task);
- pnfs_ld_read_done(rdata);
+ hdr = container_of(task, struct nfs_pgio_header, task);
+ pnfs_ld_read_done(hdr);
}
static void
bl_end_par_io_read(void *data, int unused)
{
- struct nfs_pgio_data *rdata = data;
+ struct nfs_pgio_header *hdr = data;
- rdata->task.tk_status = rdata->header->pnfs_error;
- INIT_WORK(&rdata->task.u.tk_work, bl_read_cleanup);
- schedule_work(&rdata->task.u.tk_work);
+ hdr->task.tk_status = hdr->pnfs_error;
+ INIT_WORK(&hdr->task.u.tk_work, bl_read_cleanup);
+ schedule_work(&hdr->task.u.tk_work);
}
static enum pnfs_try_status
-bl_read_pagelist(struct nfs_pgio_data *rdata)
+bl_read_pagelist(struct nfs_pgio_header *hdr)
{
- struct nfs_pgio_header *header = rdata->header;
+ struct nfs_pgio_header *header = hdr;
int i, hole;
struct bio *bio = NULL;
struct pnfs_block_extent *be = NULL, *cow_read = NULL;
sector_t isect, extent_length = 0;
struct parallel_io *par;
- loff_t f_offset = rdata->args.offset;
- size_t bytes_left = rdata->args.count;
+ loff_t f_offset = hdr->args.offset;
+ size_t bytes_left = hdr->args.count;
unsigned int pg_offset, pg_len;
- struct page **pages = rdata->args.pages;
- int pg_index = rdata->args.pgbase >> PAGE_CACHE_SHIFT;
+ struct page **pages = hdr->args.pages;
+ int pg_index = hdr->args.pgbase >> PAGE_CACHE_SHIFT;
const bool is_dio = (header->dreq != NULL);
dprintk("%s enter nr_pages %u offset %lld count %u\n", __func__,
- rdata->pages.npages, f_offset, (unsigned int)rdata->args.count);
+ hdr->page_array.npages, f_offset,
+ (unsigned int)hdr->args.count);
- par = alloc_parallel(rdata);
+ par = alloc_parallel(hdr);
if (!par)
goto use_mds;
par->pnfs_callback = bl_end_par_io_read;
@@ -268,7 +268,7 @@ bl_read_pagelist(struct nfs_pgio_data *rdata)
isect = (sector_t) (f_offset >> SECTOR_SHIFT);
/* Code assumes extents are page-aligned */
- for (i = pg_index; i < rdata->pages.npages; i++) {
+ for (i = pg_index; i < hdr->page_array.npages; i++) {
if (!extent_length) {
/* We've used up the previous extent */
bl_put_extent(be);
@@ -317,7 +317,8 @@ bl_read_pagelist(struct nfs_pgio_data *rdata)
struct pnfs_block_extent *be_read;
be_read = (hole && cow_read) ? cow_read : be;
- bio = do_add_page_to_bio(bio, rdata->pages.npages - i,
+ bio = do_add_page_to_bio(bio,
+ hdr->page_array.npages - i,
READ,
isect, pages[i], be_read,
bl_end_io_read, par,
@@ -332,10 +333,10 @@ bl_read_pagelist(struct nfs_pgio_data *rdata)
extent_length -= PAGE_CACHE_SECTORS;
}
if ((isect << SECTOR_SHIFT) >= header->inode->i_size) {
- rdata->res.eof = 1;
- rdata->res.count = header->inode->i_size - rdata->args.offset;
+ hdr->res.eof = 1;
+ hdr->res.count = header->inode->i_size - hdr->args.offset;
} else {
- rdata->res.count = (isect << SECTOR_SHIFT) - rdata->args.offset;
+ hdr->res.count = (isect << SECTOR_SHIFT) - hdr->args.offset;
}
out:
bl_put_extent(be);
@@ -390,8 +391,7 @@ static void bl_end_io_write_zero(struct bio *bio, int err)
}
if (unlikely(err)) {
- struct nfs_pgio_data *data = par->data;
- struct nfs_pgio_header *header = data->header;
+ struct nfs_pgio_header *header = par->data;
if (!header->pnfs_error)
header->pnfs_error = -EIO;
@@ -405,8 +405,7 @@ static void bl_end_io_write(struct bio *bio, int err)
{
struct parallel_io *par = bio->bi_private;
const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
- struct nfs_pgio_data *data = par->data;
- struct nfs_pgio_header *header = data->header;
+ struct nfs_pgio_header *header = par->data;
if (!uptodate) {
if (!header->pnfs_error)
@@ -423,32 +422,32 @@ static void bl_end_io_write(struct bio *bio, int err)
static void bl_write_cleanup(struct work_struct *work)
{
struct rpc_task *task;
- struct nfs_pgio_data *wdata;
+ struct nfs_pgio_header *hdr;
dprintk("%s enter\n", __func__);
task = container_of(work, struct rpc_task, u.tk_work);
- wdata = container_of(task, struct nfs_pgio_data, task);
- if (likely(!wdata->header->pnfs_error)) {
+ hdr = container_of(task, struct nfs_pgio_header, task);
+ if (likely(!hdr->pnfs_error)) {
/* Marks for LAYOUTCOMMIT */
- mark_extents_written(BLK_LSEG2EXT(wdata->header->lseg),
- wdata->args.offset, wdata->args.count);
+ mark_extents_written(BLK_LSEG2EXT(hdr->lseg),
+ hdr->args.offset, hdr->args.count);
}
- pnfs_ld_write_done(wdata);
+ pnfs_ld_write_done(hdr);
}
/* Called when last of bios associated with a bl_write_pagelist call finishes */
static void bl_end_par_io_write(void *data, int num_se)
{
- struct nfs_pgio_data *wdata = data;
+ struct nfs_pgio_header *hdr = data;
- if (unlikely(wdata->header->pnfs_error)) {
- bl_free_short_extents(&BLK_LSEG2EXT(wdata->header->lseg)->bl_inval,
+ if (unlikely(hdr->pnfs_error)) {
+ bl_free_short_extents(&BLK_LSEG2EXT(hdr->lseg)->bl_inval,
num_se);
}
- wdata->task.tk_status = wdata->header->pnfs_error;
- wdata->verf.committed = NFS_FILE_SYNC;
- INIT_WORK(&wdata->task.u.tk_work, bl_write_cleanup);
- schedule_work(&wdata->task.u.tk_work);
+ hdr->task.tk_status = hdr->pnfs_error;
+ hdr->verf.committed = NFS_FILE_SYNC;
+ INIT_WORK(&hdr->task.u.tk_work, bl_write_cleanup);
+ schedule_work(&hdr->task.u.tk_work);
}
/* FIXME STUB - mark intersection of layout and page as bad, so is not
@@ -673,18 +672,17 @@ check_page:
}
static enum pnfs_try_status
-bl_write_pagelist(struct nfs_pgio_data *wdata, int sync)
+bl_write_pagelist(struct nfs_pgio_header *header, int sync)
{
- struct nfs_pgio_header *header = wdata->header;
int i, ret, npg_zero, pg_index, last = 0;
struct bio *bio = NULL;
struct pnfs_block_extent *be = NULL, *cow_read = NULL;
sector_t isect, last_isect = 0, extent_length = 0;
struct parallel_io *par = NULL;
- loff_t offset = wdata->args.offset;
- size_t count = wdata->args.count;
+ loff_t offset = header->args.offset;
+ size_t count = header->args.count;
unsigned int pg_offset, pg_len, saved_len;
- struct page **pages = wdata->args.pages;
+ struct page **pages = header->args.pages;
struct page *page;
pgoff_t index;
u64 temp;
@@ -699,11 +697,11 @@ bl_write_pagelist(struct nfs_pgio_data *wdata, int sync)
dprintk("pnfsblock nonblock aligned DIO writes. Resend MDS\n");
goto out_mds;
}
- /* At this point, wdata->pages is a (sequential) list of nfs_pages.
+ /* At this point, header->page_aray is a (sequential) list of nfs_pages.
* We want to write each, and if there is an error set pnfs_error
* to have it redone using nfs.
*/
- par = alloc_parallel(wdata);
+ par = alloc_parallel(header);
if (!par)
goto out_mds;
par->pnfs_callback = bl_end_par_io_write;
@@ -790,8 +788,8 @@ next_page:
bio = bl_submit_bio(WRITE, bio);
/* Middle pages */
- pg_index = wdata->args.pgbase >> PAGE_CACHE_SHIFT;
- for (i = pg_index; i < wdata->pages.npages; i++) {
+ pg_index = header->args.pgbase >> PAGE_CACHE_SHIFT;
+ for (i = pg_index; i < header->page_array.npages; i++) {
if (!extent_length) {
/* We've used up the previous extent */
bl_put_extent(be);
@@ -862,7 +860,8 @@ next_page:
}
- bio = do_add_page_to_bio(bio, wdata->pages.npages - i, WRITE,
+ bio = do_add_page_to_bio(bio, header->page_array.npages - i,
+ WRITE,
isect, pages[i], be,
bl_end_io_write, par,
pg_offset, pg_len);
@@ -890,7 +889,7 @@ next_page:
}
write_done:
- wdata->res.count = wdata->args.count;
+ header->res.count = header->args.count;
out:
bl_put_extent(be);
bl_put_extent(cow_read);
@@ -1063,7 +1062,7 @@ nfs4_blk_get_deviceinfo(struct nfs_server *server, const struct nfs_fh *fh,
return ERR_PTR(-ENOMEM);
}
- pages = kzalloc(max_pages * sizeof(struct page *), GFP_NOFS);
+ pages = kcalloc(max_pages, sizeof(struct page *), GFP_NOFS);
if (pages == NULL) {
kfree(dev);
return ERR_PTR(-ENOMEM);
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index 073b4cf67ed9..54de482143cc 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -428,6 +428,18 @@ check_gss_callback_principal(struct nfs_client *clp, struct svc_rqst *rqstp)
if (p == NULL)
return 0;
+ /*
+ * Did we get the acceptor from userland during the SETCLIENID
+ * negotiation?
+ */
+ if (clp->cl_acceptor)
+ return !strcmp(p, clp->cl_acceptor);
+
+ /*
+ * Otherwise try to verify it using the cl_hostname. Note that this
+ * doesn't work if a non-canonical hostname was used in the devname.
+ */
+
/* Expect a GSS_C_NT_HOSTBASED_NAME like "nfs@serverhostname" */
if (memcmp(p, "nfs@", 4) != 0)
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 1d09289c8f0e..6a4f3666e273 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -110,8 +110,8 @@ struct nfs_subversion *get_nfs_version(unsigned int version)
mutex_unlock(&nfs_version_mutex);
}
- if (!IS_ERR(nfs))
- try_module_get(nfs->owner);
+ if (!IS_ERR(nfs) && !try_module_get(nfs->owner))
+ return ERR_PTR(-EAGAIN);
return nfs;
}
@@ -158,7 +158,8 @@ struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_init)
goto error_0;
clp->cl_nfs_mod = cl_init->nfs_mod;
- try_module_get(clp->cl_nfs_mod->owner);
+ if (!try_module_get(clp->cl_nfs_mod->owner))
+ goto error_dealloc;
clp->rpc_ops = clp->cl_nfs_mod->rpc_ops;
@@ -190,6 +191,7 @@ struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_init)
error_cleanup:
put_nfs_version(clp->cl_nfs_mod);
+error_dealloc:
kfree(clp);
error_0:
return ERR_PTR(err);
@@ -252,6 +254,7 @@ void nfs_free_client(struct nfs_client *clp)
put_net(clp->cl_net);
put_nfs_version(clp->cl_nfs_mod);
kfree(clp->cl_hostname);
+ kfree(clp->cl_acceptor);
kfree(clp);
dprintk("<-- nfs_free_client()\n");
@@ -482,8 +485,13 @@ nfs_get_client(const struct nfs_client_initdata *cl_init,
struct nfs_net *nn = net_generic(cl_init->net, nfs_net_id);
const struct nfs_rpc_ops *rpc_ops = cl_init->nfs_mod->rpc_ops;
+ if (cl_init->hostname == NULL) {
+ WARN_ON(1);
+ return NULL;
+ }
+
dprintk("--> nfs_get_client(%s,v%u)\n",
- cl_init->hostname ?: "", rpc_ops->version);
+ cl_init->hostname, rpc_ops->version);
/* see if the client already exists */
do {
@@ -510,7 +518,7 @@ nfs_get_client(const struct nfs_client_initdata *cl_init,
} while (!IS_ERR(new));
dprintk("<-- nfs_get_client() Failed to find %s (%ld)\n",
- cl_init->hostname ?: "", PTR_ERR(new));
+ cl_init->hostname, PTR_ERR(new));
return new;
}
EXPORT_SYMBOL_GPL(nfs_get_client);
@@ -1205,7 +1213,7 @@ static const struct file_operations nfs_server_list_fops = {
.open = nfs_server_list_open,
.read = seq_read,
.llseek = seq_lseek,
- .release = seq_release,
+ .release = seq_release_net,
.owner = THIS_MODULE,
};
@@ -1226,7 +1234,7 @@ static const struct file_operations nfs_volume_list_fops = {
.open = nfs_volume_list_open,
.read = seq_read,
.llseek = seq_lseek,
- .release = seq_release,
+ .release = seq_release_net,
.owner = THIS_MODULE,
};
@@ -1236,19 +1244,8 @@ static const struct file_operations nfs_volume_list_fops = {
*/
static int nfs_server_list_open(struct inode *inode, struct file *file)
{
- struct seq_file *m;
- int ret;
- struct pid_namespace *pid_ns = file->f_dentry->d_sb->s_fs_info;
- struct net *net = pid_ns->child_reaper->nsproxy->net_ns;
-
- ret = seq_open(file, &nfs_server_list_ops);
- if (ret < 0)
- return ret;
-
- m = file->private_data;
- m->private = net;
-
- return 0;
+ return seq_open_net(inode, file, &nfs_server_list_ops,
+ sizeof(struct seq_net_private));
}
/*
@@ -1256,7 +1253,7 @@ static int nfs_server_list_open(struct inode *inode, struct file *file)
*/
static void *nfs_server_list_start(struct seq_file *m, loff_t *_pos)
{
- struct nfs_net *nn = net_generic(m->private, nfs_net_id);
+ struct nfs_net *nn = net_generic(seq_file_net(m), nfs_net_id);
/* lock the list against modification */
spin_lock(&nn->nfs_client_lock);
@@ -1268,7 +1265,7 @@ static void *nfs_server_list_start(struct seq_file *m, loff_t *_pos)
*/
static void *nfs_server_list_next(struct seq_file *p, void *v, loff_t *pos)
{
- struct nfs_net *nn = net_generic(p->private, nfs_net_id);
+ struct nfs_net *nn = net_generic(seq_file_net(p), nfs_net_id);
return seq_list_next(v, &nn->nfs_client_list, pos);
}
@@ -1278,7 +1275,7 @@ static void *nfs_server_list_next(struct seq_file *p, void *v, loff_t *pos)
*/
static void nfs_server_list_stop(struct seq_file *p, void *v)
{
- struct nfs_net *nn = net_generic(p->private, nfs_net_id);
+ struct nfs_net *nn = net_generic(seq_file_net(p), nfs_net_id);
spin_unlock(&nn->nfs_client_lock);
}
@@ -1289,7 +1286,7 @@ static void nfs_server_list_stop(struct seq_file *p, void *v)
static int nfs_server_list_show(struct seq_file *m, void *v)
{
struct nfs_client *clp;
- struct nfs_net *nn = net_generic(m->private, nfs_net_id);
+ struct nfs_net *nn = net_generic(seq_file_net(m), nfs_net_id);
/* display header on line 1 */
if (v == &nn->nfs_client_list) {
@@ -1321,19 +1318,8 @@ static int nfs_server_list_show(struct seq_file *m, void *v)
*/
static int nfs_volume_list_open(struct inode *inode, struct file *file)
{
- struct seq_file *m;
- int ret;
- struct pid_namespace *pid_ns = file->f_dentry->d_sb->s_fs_info;
- struct net *net = pid_ns->child_reaper->nsproxy->net_ns;
-
- ret = seq_open(file, &nfs_volume_list_ops);
- if (ret < 0)
- return ret;
-
- m = file->private_data;
- m->private = net;
-
- return 0;
+ return seq_open_net(inode, file, &nfs_server_list_ops,
+ sizeof(struct seq_net_private));
}
/*
@@ -1341,7 +1327,7 @@ static int nfs_volume_list_open(struct inode *inode, struct file *file)
*/
static void *nfs_volume_list_start(struct seq_file *m, loff_t *_pos)
{
- struct nfs_net *nn = net_generic(m->private, nfs_net_id);
+ struct nfs_net *nn = net_generic(seq_file_net(m), nfs_net_id);
/* lock the list against modification */
spin_lock(&nn->nfs_client_lock);
@@ -1353,7 +1339,7 @@ static void *nfs_volume_list_start(struct seq_file *m, loff_t *_pos)
*/
static void *nfs_volume_list_next(struct seq_file *p, void *v, loff_t *pos)
{
- struct nfs_net *nn = net_generic(p->private, nfs_net_id);
+ struct nfs_net *nn = net_generic(seq_file_net(p), nfs_net_id);
return seq_list_next(v, &nn->nfs_volume_list, pos);
}
@@ -1363,7 +1349,7 @@ static void *nfs_volume_list_next(struct seq_file *p, void *v, loff_t *pos)
*/
static void nfs_volume_list_stop(struct seq_file *p, void *v)
{
- struct nfs_net *nn = net_generic(p->private, nfs_net_id);
+ struct nfs_net *nn = net_generic(seq_file_net(p), nfs_net_id);
spin_unlock(&nn->nfs_client_lock);
}
@@ -1376,7 +1362,7 @@ static int nfs_volume_list_show(struct seq_file *m, void *v)
struct nfs_server *server;
struct nfs_client *clp;
char dev[8], fsid[17];
- struct nfs_net *nn = net_generic(m->private, nfs_net_id);
+ struct nfs_net *nn = net_generic(seq_file_net(m), nfs_net_id);
/* display header on line 1 */
if (v == &nn->nfs_volume_list) {
@@ -1407,6 +1393,39 @@ static int nfs_volume_list_show(struct seq_file *m, void *v)
return 0;
}
+int nfs_fs_proc_net_init(struct net *net)
+{
+ struct nfs_net *nn = net_generic(net, nfs_net_id);
+ struct proc_dir_entry *p;
+
+ nn->proc_nfsfs = proc_net_mkdir(net, "nfsfs", net->proc_net);
+ if (!nn->proc_nfsfs)
+ goto error_0;
+
+ /* a file of servers with which we're dealing */
+ p = proc_create("servers", S_IFREG|S_IRUGO,
+ nn->proc_nfsfs, &nfs_server_list_fops);
+ if (!p)
+ goto error_1;
+
+ /* a file of volumes that we have mounted */
+ p = proc_create("volumes", S_IFREG|S_IRUGO,
+ nn->proc_nfsfs, &nfs_volume_list_fops);
+ if (!p)
+ goto error_1;
+ return 0;
+
+error_1:
+ remove_proc_subtree("nfsfs", net->proc_net);
+error_0:
+ return -ENOMEM;
+}
+
+void nfs_fs_proc_net_exit(struct net *net)
+{
+ remove_proc_subtree("nfsfs", net->proc_net);
+}
+
/*
* initialise the /proc/fs/nfsfs/ directory
*/
@@ -1419,14 +1438,12 @@ int __init nfs_fs_proc_init(void)
goto error_0;
/* a file of servers with which we're dealing */
- p = proc_create("servers", S_IFREG|S_IRUGO,
- proc_fs_nfs, &nfs_server_list_fops);
+ p = proc_symlink("servers", proc_fs_nfs, "../../net/nfsfs/servers");
if (!p)
goto error_1;
/* a file of volumes that we have mounted */
- p = proc_create("volumes", S_IFREG|S_IRUGO,
- proc_fs_nfs, &nfs_volume_list_fops);
+ p = proc_symlink("volumes", proc_fs_nfs, "../../net/nfsfs/volumes");
if (!p)
goto error_2;
return 0;
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index 5d8ccecf5f5c..5853f53db732 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -41,14 +41,8 @@ void nfs_mark_delegation_referenced(struct nfs_delegation *delegation)
set_bit(NFS_DELEGATION_REFERENCED, &delegation->flags);
}
-/**
- * nfs_have_delegation - check if inode has a delegation
- * @inode: inode to check
- * @flags: delegation types to check for
- *
- * Returns one if inode has the indicated delegation, otherwise zero.
- */
-int nfs4_have_delegation(struct inode *inode, fmode_t flags)
+static int
+nfs4_do_check_delegation(struct inode *inode, fmode_t flags, bool mark)
{
struct nfs_delegation *delegation;
int ret = 0;
@@ -58,12 +52,34 @@ int nfs4_have_delegation(struct inode *inode, fmode_t flags)
delegation = rcu_dereference(NFS_I(inode)->delegation);
if (delegation != NULL && (delegation->type & flags) == flags &&
!test_bit(NFS_DELEGATION_RETURNING, &delegation->flags)) {
- nfs_mark_delegation_referenced(delegation);
+ if (mark)
+ nfs_mark_delegation_referenced(delegation);
ret = 1;
}
rcu_read_unlock();
return ret;
}
+/**
+ * nfs_have_delegation - check if inode has a delegation, mark it
+ * NFS_DELEGATION_REFERENCED if there is one.
+ * @inode: inode to check
+ * @flags: delegation types to check for
+ *
+ * Returns one if inode has the indicated delegation, otherwise zero.
+ */
+int nfs4_have_delegation(struct inode *inode, fmode_t flags)
+{
+ return nfs4_do_check_delegation(inode, flags, true);
+}
+
+/*
+ * nfs4_check_delegation - check if inode has a delegation, do not mark
+ * NFS_DELEGATION_REFERENCED if it has one.
+ */
+int nfs4_check_delegation(struct inode *inode, fmode_t flags)
+{
+ return nfs4_do_check_delegation(inode, flags, false);
+}
static int nfs_delegation_claim_locks(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid)
{
diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h
index 9a79c7a99d6d..5c1cce39297f 100644
--- a/fs/nfs/delegation.h
+++ b/fs/nfs/delegation.h
@@ -59,6 +59,7 @@ bool nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode, fmode_
void nfs_mark_delegation_referenced(struct nfs_delegation *delegation);
int nfs4_have_delegation(struct inode *inode, fmode_t flags);
+int nfs4_check_delegation(struct inode *inode, fmode_t flags);
#endif
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 4a3d4ef76127..36d921f0c602 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -988,9 +988,13 @@ EXPORT_SYMBOL_GPL(nfs_force_lookup_revalidate);
* A check for whether or not the parent directory has changed.
* In the case it has, we assume that the dentries are untrustworthy
* and may need to be looked up again.
+ * If rcu_walk prevents us from performing a full check, return 0.
*/
-static int nfs_check_verifier(struct inode *dir, struct dentry *dentry)
+static int nfs_check_verifier(struct inode *dir, struct dentry *dentry,
+ int rcu_walk)
{
+ int ret;
+
if (IS_ROOT(dentry))
return 1;
if (NFS_SERVER(dir)->flags & NFS_MOUNT_LOOKUP_CACHE_NONE)
@@ -998,7 +1002,11 @@ static int nfs_check_verifier(struct inode *dir, struct dentry *dentry)
if (!nfs_verify_change_attribute(dir, dentry->d_time))
return 0;
/* Revalidate nfsi->cache_change_attribute before we declare a match */
- if (nfs_revalidate_inode(NFS_SERVER(dir), dir) < 0)
+ if (rcu_walk)
+ ret = nfs_revalidate_inode_rcu(NFS_SERVER(dir), dir);
+ else
+ ret = nfs_revalidate_inode(NFS_SERVER(dir), dir);
+ if (ret < 0)
return 0;
if (!nfs_verify_change_attribute(dir, dentry->d_time))
return 0;
@@ -1042,6 +1050,8 @@ int nfs_lookup_verify_inode(struct inode *inode, unsigned int flags)
out:
return (inode->i_nlink == 0) ? -ENOENT : 0;
out_force:
+ if (flags & LOOKUP_RCU)
+ return -ECHILD;
ret = __nfs_revalidate_inode(server, inode);
if (ret != 0)
return ret;
@@ -1054,6 +1064,9 @@ out_force:
*
* If parent mtime has changed, we revalidate, else we wait for a
* period corresponding to the parent's attribute cache timeout value.
+ *
+ * If LOOKUP_RCU prevents us from performing a full check, return 1
+ * suggesting a reval is needed.
*/
static inline
int nfs_neg_need_reval(struct inode *dir, struct dentry *dentry,
@@ -1064,7 +1077,7 @@ int nfs_neg_need_reval(struct inode *dir, struct dentry *dentry,
return 0;
if (NFS_SERVER(dir)->flags & NFS_MOUNT_LOOKUP_CACHE_NONEG)
return 1;
- return !nfs_check_verifier(dir, dentry);
+ return !nfs_check_verifier(dir, dentry, flags & LOOKUP_RCU);
}
/*
@@ -1088,21 +1101,30 @@ static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags)
struct nfs4_label *label = NULL;
int error;
- if (flags & LOOKUP_RCU)
- return -ECHILD;
-
- parent = dget_parent(dentry);
- dir = parent->d_inode;
+ if (flags & LOOKUP_RCU) {
+ parent = ACCESS_ONCE(dentry->d_parent);
+ dir = ACCESS_ONCE(parent->d_inode);
+ if (!dir)
+ return -ECHILD;
+ } else {
+ parent = dget_parent(dentry);
+ dir = parent->d_inode;
+ }
nfs_inc_stats(dir, NFSIOS_DENTRYREVALIDATE);
inode = dentry->d_inode;
if (!inode) {
- if (nfs_neg_need_reval(dir, dentry, flags))
+ if (nfs_neg_need_reval(dir, dentry, flags)) {
+ if (flags & LOOKUP_RCU)
+ return -ECHILD;
goto out_bad;
+ }
goto out_valid_noent;
}
if (is_bad_inode(inode)) {
+ if (flags & LOOKUP_RCU)
+ return -ECHILD;
dfprintk(LOOKUPCACHE, "%s: %pd2 has dud inode\n",
__func__, dentry);
goto out_bad;
@@ -1112,12 +1134,20 @@ static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags)
goto out_set_verifier;
/* Force a full look up iff the parent directory has changed */
- if (!nfs_is_exclusive_create(dir, flags) && nfs_check_verifier(dir, dentry)) {
- if (nfs_lookup_verify_inode(inode, flags))
+ if (!nfs_is_exclusive_create(dir, flags) &&
+ nfs_check_verifier(dir, dentry, flags & LOOKUP_RCU)) {
+
+ if (nfs_lookup_verify_inode(inode, flags)) {
+ if (flags & LOOKUP_RCU)
+ return -ECHILD;
goto out_zap_parent;
+ }
goto out_valid;
}
+ if (flags & LOOKUP_RCU)
+ return -ECHILD;
+
if (NFS_STALE(inode))
goto out_bad;
@@ -1153,13 +1183,18 @@ out_set_verifier:
/* Success: notify readdir to use READDIRPLUS */
nfs_advise_use_readdirplus(dir);
out_valid_noent:
- dput(parent);
+ if (flags & LOOKUP_RCU) {
+ if (parent != ACCESS_ONCE(dentry->d_parent))
+ return -ECHILD;
+ } else
+ dput(parent);
dfprintk(LOOKUPCACHE, "NFS: %s(%pd2) is valid\n",
__func__, dentry);
return 1;
out_zap_parent:
nfs_zap_caches(dir);
out_bad:
+ WARN_ON(flags & LOOKUP_RCU);
nfs_free_fattr(fattr);
nfs_free_fhandle(fhandle);
nfs4_label_free(label);
@@ -1185,6 +1220,7 @@ out_zap_parent:
__func__, dentry);
return 0;
out_error:
+ WARN_ON(flags & LOOKUP_RCU);
nfs_free_fattr(fattr);
nfs_free_fhandle(fhandle);
nfs4_label_free(label);
@@ -1529,14 +1565,9 @@ EXPORT_SYMBOL_GPL(nfs_atomic_open);
static int nfs4_lookup_revalidate(struct dentry *dentry, unsigned int flags)
{
- struct dentry *parent = NULL;
struct inode *inode;
- struct inode *dir;
int ret = 0;
- if (flags & LOOKUP_RCU)
- return -ECHILD;
-
if (!(flags & LOOKUP_OPEN) || (flags & LOOKUP_DIRECTORY))
goto no_open;
if (d_mountpoint(dentry))
@@ -1545,34 +1576,47 @@ static int nfs4_lookup_revalidate(struct dentry *dentry, unsigned int flags)
goto no_open;
inode = dentry->d_inode;
- parent = dget_parent(dentry);
- dir = parent->d_inode;
/* We can't create new files in nfs_open_revalidate(), so we
* optimize away revalidation of negative dentries.
*/
if (inode == NULL) {
+ struct dentry *parent;
+ struct inode *dir;
+
+ if (flags & LOOKUP_RCU) {
+ parent = ACCESS_ONCE(dentry->d_parent);
+ dir = ACCESS_ONCE(parent->d_inode);
+ if (!dir)
+ return -ECHILD;
+ } else {
+ parent = dget_parent(dentry);
+ dir = parent->d_inode;
+ }
if (!nfs_neg_need_reval(dir, dentry, flags))
ret = 1;
+ else if (flags & LOOKUP_RCU)
+ ret = -ECHILD;
+ if (!(flags & LOOKUP_RCU))
+ dput(parent);
+ else if (parent != ACCESS_ONCE(dentry->d_parent))
+ return -ECHILD;
goto out;
}
/* NFS only supports OPEN on regular files */
if (!S_ISREG(inode->i_mode))
- goto no_open_dput;
+ goto no_open;
/* We cannot do exclusive creation on a positive dentry */
if (flags & LOOKUP_EXCL)
- goto no_open_dput;
+ goto no_open;
/* Let f_op->open() actually open (and revalidate) the file */
ret = 1;
out:
- dput(parent);
return ret;
-no_open_dput:
- dput(parent);
no_open:
return nfs_lookup_revalidate(dentry, flags);
}
@@ -2028,10 +2072,14 @@ static DEFINE_SPINLOCK(nfs_access_lru_lock);
static LIST_HEAD(nfs_access_lru_list);
static atomic_long_t nfs_access_nr_entries;
+static unsigned long nfs_access_max_cachesize = ULONG_MAX;
+module_param(nfs_access_max_cachesize, ulong, 0644);
+MODULE_PARM_DESC(nfs_access_max_cachesize, "NFS access maximum total cache length");
+
static void nfs_access_free_entry(struct nfs_access_entry *entry)
{
put_rpccred(entry->cred);
- kfree(entry);
+ kfree_rcu(entry, rcu_head);
smp_mb__before_atomic();
atomic_long_dec(&nfs_access_nr_entries);
smp_mb__after_atomic();
@@ -2048,19 +2096,14 @@ static void nfs_access_free_list(struct list_head *head)
}
}
-unsigned long
-nfs_access_cache_scan(struct shrinker *shrink, struct shrink_control *sc)
+static unsigned long
+nfs_do_access_cache_scan(unsigned int nr_to_scan)
{
LIST_HEAD(head);
struct nfs_inode *nfsi, *next;
struct nfs_access_entry *cache;
- int nr_to_scan = sc->nr_to_scan;
- gfp_t gfp_mask = sc->gfp_mask;
long freed = 0;
- if ((gfp_mask & GFP_KERNEL) != GFP_KERNEL)
- return SHRINK_STOP;
-
spin_lock(&nfs_access_lru_lock);
list_for_each_entry_safe(nfsi, next, &nfs_access_lru_list, access_cache_inode_lru) {
struct inode *inode;
@@ -2094,11 +2137,39 @@ remove_lru_entry:
}
unsigned long
+nfs_access_cache_scan(struct shrinker *shrink, struct shrink_control *sc)
+{
+ int nr_to_scan = sc->nr_to_scan;
+ gfp_t gfp_mask = sc->gfp_mask;
+
+ if ((gfp_mask & GFP_KERNEL) != GFP_KERNEL)
+ return SHRINK_STOP;
+ return nfs_do_access_cache_scan(nr_to_scan);
+}
+
+
+unsigned long
nfs_access_cache_count(struct shrinker *shrink, struct shrink_control *sc)
{
return vfs_pressure_ratio(atomic_long_read(&nfs_access_nr_entries));
}
+static void
+nfs_access_cache_enforce_limit(void)
+{
+ long nr_entries = atomic_long_read(&nfs_access_nr_entries);
+ unsigned long diff;
+ unsigned int nr_to_scan;
+
+ if (nr_entries < 0 || nr_entries <= nfs_access_max_cachesize)
+ return;
+ nr_to_scan = 100;
+ diff = nr_entries - nfs_access_max_cachesize;
+ if (diff < nr_to_scan)
+ nr_to_scan = diff;
+ nfs_do_access_cache_scan(nr_to_scan);
+}
+
static void __nfs_access_zap_cache(struct nfs_inode *nfsi, struct list_head *head)
{
struct rb_root *root_node = &nfsi->access_cache;
@@ -2186,6 +2257,38 @@ out_zap:
return -ENOENT;
}
+static int nfs_access_get_cached_rcu(struct inode *inode, struct rpc_cred *cred, struct nfs_access_entry *res)
+{
+ /* Only check the most recently returned cache entry,
+ * but do it without locking.
+ */
+ struct nfs_inode *nfsi = NFS_I(inode);
+ struct nfs_access_entry *cache;
+ int err = -ECHILD;
+ struct list_head *lh;
+
+ rcu_read_lock();
+ if (nfsi->cache_validity & NFS_INO_INVALID_ACCESS)
+ goto out;
+ lh = rcu_dereference(nfsi->access_cache_entry_lru.prev);
+ cache = list_entry(lh, struct nfs_access_entry, lru);
+ if (lh == &nfsi->access_cache_entry_lru ||
+ cred != cache->cred)
+ cache = NULL;
+ if (cache == NULL)
+ goto out;
+ if (!nfs_have_delegated_attributes(inode) &&
+ !time_in_range_open(jiffies, cache->jiffies, cache->jiffies + nfsi->attrtimeo))
+ goto out;
+ res->jiffies = cache->jiffies;
+ res->cred = cache->cred;
+ res->mask = cache->mask;
+ err = 0;
+out:
+ rcu_read_unlock();
+ return err;
+}
+
static void nfs_access_add_rbtree(struct inode *inode, struct nfs_access_entry *set)
{
struct nfs_inode *nfsi = NFS_I(inode);
@@ -2229,6 +2332,11 @@ void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set)
cache->cred = get_rpccred(set->cred);
cache->mask = set->mask;
+ /* The above field assignments must be visible
+ * before this item appears on the lru. We cannot easily
+ * use rcu_assign_pointer, so just force the memory barrier.
+ */
+ smp_wmb();
nfs_access_add_rbtree(inode, cache);
/* Update accounting */
@@ -2244,6 +2352,7 @@ void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set)
&nfs_access_lru_list);
spin_unlock(&nfs_access_lru_lock);
}
+ nfs_access_cache_enforce_limit();
}
EXPORT_SYMBOL_GPL(nfs_access_add_cache);
@@ -2267,10 +2376,16 @@ static int nfs_do_access(struct inode *inode, struct rpc_cred *cred, int mask)
trace_nfs_access_enter(inode);
- status = nfs_access_get_cached(inode, cred, &cache);
+ status = nfs_access_get_cached_rcu(inode, cred, &cache);
+ if (status != 0)
+ status = nfs_access_get_cached(inode, cred, &cache);
if (status == 0)
goto out_cached;
+ status = -ECHILD;
+ if (mask & MAY_NOT_BLOCK)
+ goto out;
+
/* Be clever: ask server to check for all possible rights */
cache.mask = MAY_EXEC | MAY_WRITE | MAY_READ;
cache.cred = cred;
@@ -2321,9 +2436,6 @@ int nfs_permission(struct inode *inode, int mask)
struct rpc_cred *cred;
int res = 0;
- if (mask & MAY_NOT_BLOCK)
- return -ECHILD;
-
nfs_inc_stats(inode, NFSIOS_VFSACCESS);
if ((mask & (MAY_READ | MAY_WRITE | MAY_EXEC)) == 0)
@@ -2350,12 +2462,23 @@ force_lookup:
if (!NFS_PROTO(inode)->access)
goto out_notsup;
- cred = rpc_lookup_cred();
- if (!IS_ERR(cred)) {
- res = nfs_do_access(inode, cred, mask);
- put_rpccred(cred);
- } else
+ /* Always try fast lookups first */
+ rcu_read_lock();
+ cred = rpc_lookup_cred_nonblock();
+ if (!IS_ERR(cred))
+ res = nfs_do_access(inode, cred, mask|MAY_NOT_BLOCK);
+ else
res = PTR_ERR(cred);
+ rcu_read_unlock();
+ if (res == -ECHILD && !(mask & MAY_NOT_BLOCK)) {
+ /* Fast lookup failed, try the slow way */
+ cred = rpc_lookup_cred();
+ if (!IS_ERR(cred)) {
+ res = nfs_do_access(inode, cred, mask);
+ put_rpccred(cred);
+ } else
+ res = PTR_ERR(cred);
+ }
out:
if (!res && (mask & MAY_EXEC) && !execute_ok(inode))
res = -EACCES;
@@ -2364,6 +2487,9 @@ out:
inode->i_sb->s_id, inode->i_ino, mask, res);
return res;
out_notsup:
+ if (mask & MAY_NOT_BLOCK)
+ return -ECHILD;
+
res = nfs_revalidate_inode(NFS_SERVER(inode), inode);
if (res == 0)
res = generic_permission(inode, mask);
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index f11b9eed0de1..65ef6e00deee 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -148,8 +148,8 @@ static void nfs_direct_set_hdr_verf(struct nfs_direct_req *dreq,
{
struct nfs_writeverf *verfp;
- verfp = nfs_direct_select_verf(dreq, hdr->data->ds_clp,
- hdr->data->ds_idx);
+ verfp = nfs_direct_select_verf(dreq, hdr->ds_clp,
+ hdr->ds_idx);
WARN_ON_ONCE(verfp->committed >= 0);
memcpy(verfp, &hdr->verf, sizeof(struct nfs_writeverf));
WARN_ON_ONCE(verfp->committed < 0);
@@ -169,8 +169,8 @@ static int nfs_direct_set_or_cmp_hdr_verf(struct nfs_direct_req *dreq,
{
struct nfs_writeverf *verfp;
- verfp = nfs_direct_select_verf(dreq, hdr->data->ds_clp,
- hdr->data->ds_idx);
+ verfp = nfs_direct_select_verf(dreq, hdr->ds_clp,
+ hdr->ds_idx);
if (verfp->committed < 0) {
nfs_direct_set_hdr_verf(dreq, hdr);
return 0;
@@ -715,7 +715,7 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)
{
struct nfs_direct_req *dreq = hdr->dreq;
struct nfs_commit_info cinfo;
- int bit = -1;
+ bool request_commit = false;
struct nfs_page *req = nfs_list_entry(hdr->pages.next);
if (test_bit(NFS_IOHDR_REDO, &hdr->flags))
@@ -729,27 +729,20 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)
dreq->flags = 0;
dreq->error = hdr->error;
}
- if (dreq->error != 0)
- bit = NFS_IOHDR_ERROR;
- else {
+ if (dreq->error == 0) {
dreq->count += hdr->good_bytes;
- if (test_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags)) {
- dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
- bit = NFS_IOHDR_NEED_RESCHED;
- } else if (test_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags)) {
+ if (nfs_write_need_commit(hdr)) {
if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES)
- bit = NFS_IOHDR_NEED_RESCHED;
+ request_commit = true;
else if (dreq->flags == 0) {
nfs_direct_set_hdr_verf(dreq, hdr);
- bit = NFS_IOHDR_NEED_COMMIT;
+ request_commit = true;
dreq->flags = NFS_ODIRECT_DO_COMMIT;
} else if (dreq->flags == NFS_ODIRECT_DO_COMMIT) {
- if (nfs_direct_set_or_cmp_hdr_verf(dreq, hdr)) {
+ request_commit = true;
+ if (nfs_direct_set_or_cmp_hdr_verf(dreq, hdr))
dreq->flags =
NFS_ODIRECT_RESCHED_WRITES;
- bit = NFS_IOHDR_NEED_RESCHED;
- } else
- bit = NFS_IOHDR_NEED_COMMIT;
}
}
}
@@ -759,9 +752,7 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)
req = nfs_list_entry(hdr->pages.next);
nfs_list_remove_request(req);
- switch (bit) {
- case NFS_IOHDR_NEED_RESCHED:
- case NFS_IOHDR_NEED_COMMIT:
+ if (request_commit) {
kref_get(&req->wb_kref);
nfs_mark_request_commit(req, hdr->lseg, &cinfo);
}
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 4042ff58fe3f..524dd80d1898 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -361,8 +361,8 @@ start:
* Prevent starvation issues if someone is doing a consistency
* sync-to-disk
*/
- ret = wait_on_bit(&NFS_I(mapping->host)->flags, NFS_INO_FLUSHING,
- nfs_wait_bit_killable, TASK_KILLABLE);
+ ret = wait_on_bit_action(&NFS_I(mapping->host)->flags, NFS_INO_FLUSHING,
+ nfs_wait_bit_killable, TASK_KILLABLE);
if (ret)
return ret;
diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c
index d2eba1c13b7e..90978075f730 100644
--- a/fs/nfs/filelayout/filelayout.c
+++ b/fs/nfs/filelayout/filelayout.c
@@ -84,45 +84,37 @@ filelayout_get_dserver_offset(struct pnfs_layout_segment *lseg, loff_t offset)
BUG();
}
-static void filelayout_reset_write(struct nfs_pgio_data *data)
+static void filelayout_reset_write(struct nfs_pgio_header *hdr)
{
- struct nfs_pgio_header *hdr = data->header;
- struct rpc_task *task = &data->task;
+ struct rpc_task *task = &hdr->task;
if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) {
dprintk("%s Reset task %5u for i/o through MDS "
"(req %s/%llu, %u bytes @ offset %llu)\n", __func__,
- data->task.tk_pid,
+ hdr->task.tk_pid,
hdr->inode->i_sb->s_id,
(unsigned long long)NFS_FILEID(hdr->inode),
- data->args.count,
- (unsigned long long)data->args.offset);
+ hdr->args.count,
+ (unsigned long long)hdr->args.offset);
- task->tk_status = pnfs_write_done_resend_to_mds(hdr->inode,
- &hdr->pages,
- hdr->completion_ops,
- hdr->dreq);
+ task->tk_status = pnfs_write_done_resend_to_mds(hdr);
}
}
-static void filelayout_reset_read(struct nfs_pgio_data *data)
+static void filelayout_reset_read(struct nfs_pgio_header *hdr)
{
- struct nfs_pgio_header *hdr = data->header;
- struct rpc_task *task = &data->task;
+ struct rpc_task *task = &hdr->task;
if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) {
dprintk("%s Reset task %5u for i/o through MDS "
"(req %s/%llu, %u bytes @ offset %llu)\n", __func__,
- data->task.tk_pid,
+ hdr->task.tk_pid,
hdr->inode->i_sb->s_id,
(unsigned long long)NFS_FILEID(hdr->inode),
- data->args.count,
- (unsigned long long)data->args.offset);
+ hdr->args.count,
+ (unsigned long long)hdr->args.offset);
- task->tk_status = pnfs_read_done_resend_to_mds(hdr->inode,
- &hdr->pages,
- hdr->completion_ops,
- hdr->dreq);
+ task->tk_status = pnfs_read_done_resend_to_mds(hdr);
}
}
@@ -243,18 +235,17 @@ wait_on_recovery:
/* NFS_PROTO call done callback routines */
static int filelayout_read_done_cb(struct rpc_task *task,
- struct nfs_pgio_data *data)
+ struct nfs_pgio_header *hdr)
{
- struct nfs_pgio_header *hdr = data->header;
int err;
- trace_nfs4_pnfs_read(data, task->tk_status);
- err = filelayout_async_handle_error(task, data->args.context->state,
- data->ds_clp, hdr->lseg);
+ trace_nfs4_pnfs_read(hdr, task->tk_status);
+ err = filelayout_async_handle_error(task, hdr->args.context->state,
+ hdr->ds_clp, hdr->lseg);
switch (err) {
case -NFS4ERR_RESET_TO_MDS:
- filelayout_reset_read(data);
+ filelayout_reset_read(hdr);
return task->tk_status;
case -EAGAIN:
rpc_restart_call_prepare(task);
@@ -270,15 +261,14 @@ static int filelayout_read_done_cb(struct rpc_task *task,
* rfc5661 is not clear about which credential should be used.
*/
static void
-filelayout_set_layoutcommit(struct nfs_pgio_data *wdata)
+filelayout_set_layoutcommit(struct nfs_pgio_header *hdr)
{
- struct nfs_pgio_header *hdr = wdata->header;
if (FILELAYOUT_LSEG(hdr->lseg)->commit_through_mds ||
- wdata->res.verf->committed == NFS_FILE_SYNC)
+ hdr->res.verf->committed == NFS_FILE_SYNC)
return;
- pnfs_set_layoutcommit(wdata);
+ pnfs_set_layoutcommit(hdr);
dprintk("%s inode %lu pls_end_pos %lu\n", __func__, hdr->inode->i_ino,
(unsigned long) NFS_I(hdr->inode)->layout->plh_lwb);
}
@@ -305,83 +295,82 @@ filelayout_reset_to_mds(struct pnfs_layout_segment *lseg)
*/
static void filelayout_read_prepare(struct rpc_task *task, void *data)
{
- struct nfs_pgio_data *rdata = data;
+ struct nfs_pgio_header *hdr = data;
- if (unlikely(test_bit(NFS_CONTEXT_BAD, &rdata->args.context->flags))) {
+ if (unlikely(test_bit(NFS_CONTEXT_BAD, &hdr->args.context->flags))) {
rpc_exit(task, -EIO);
return;
}
- if (filelayout_reset_to_mds(rdata->header->lseg)) {
+ if (filelayout_reset_to_mds(hdr->lseg)) {
dprintk("%s task %u reset io to MDS\n", __func__, task->tk_pid);
- filelayout_reset_read(rdata);
+ filelayout_reset_read(hdr);
rpc_exit(task, 0);
return;
}
- rdata->pgio_done_cb = filelayout_read_done_cb;
+ hdr->pgio_done_cb = filelayout_read_done_cb;
- if (nfs41_setup_sequence(rdata->ds_clp->cl_session,
- &rdata->args.seq_args,
- &rdata->res.seq_res,
+ if (nfs41_setup_sequence(hdr->ds_clp->cl_session,
+ &hdr->args.seq_args,
+ &hdr->res.seq_res,
task))
return;
- if (nfs4_set_rw_stateid(&rdata->args.stateid, rdata->args.context,
- rdata->args.lock_context, FMODE_READ) == -EIO)
+ if (nfs4_set_rw_stateid(&hdr->args.stateid, hdr->args.context,
+ hdr->args.lock_context, FMODE_READ) == -EIO)
rpc_exit(task, -EIO); /* lost lock, terminate I/O */
}
static void filelayout_read_call_done(struct rpc_task *task, void *data)
{
- struct nfs_pgio_data *rdata = data;
+ struct nfs_pgio_header *hdr = data;
dprintk("--> %s task->tk_status %d\n", __func__, task->tk_status);
- if (test_bit(NFS_IOHDR_REDO, &rdata->header->flags) &&
+ if (test_bit(NFS_IOHDR_REDO, &hdr->flags) &&
task->tk_status == 0) {
- nfs41_sequence_done(task, &rdata->res.seq_res);
+ nfs41_sequence_done(task, &hdr->res.seq_res);
return;
}
/* Note this may cause RPC to be resent */
- rdata->header->mds_ops->rpc_call_done(task, data);
+ hdr->mds_ops->rpc_call_done(task, data);
}
static void filelayout_read_count_stats(struct rpc_task *task, void *data)
{
- struct nfs_pgio_data *rdata = data;
+ struct nfs_pgio_header *hdr = data;
- rpc_count_iostats(task, NFS_SERVER(rdata->header->inode)->client->cl_metrics);
+ rpc_count_iostats(task, NFS_SERVER(hdr->inode)->client->cl_metrics);
}
static void filelayout_read_release(void *data)
{
- struct nfs_pgio_data *rdata = data;
- struct pnfs_layout_hdr *lo = rdata->header->lseg->pls_layout;
+ struct nfs_pgio_header *hdr = data;
+ struct pnfs_layout_hdr *lo = hdr->lseg->pls_layout;
filelayout_fenceme(lo->plh_inode, lo);
- nfs_put_client(rdata->ds_clp);
- rdata->header->mds_ops->rpc_release(data);
+ nfs_put_client(hdr->ds_clp);
+ hdr->mds_ops->rpc_release(data);
}
static int filelayout_write_done_cb(struct rpc_task *task,
- struct nfs_pgio_data *data)
+ struct nfs_pgio_header *hdr)
{
- struct nfs_pgio_header *hdr = data->header;
int err;
- trace_nfs4_pnfs_write(data, task->tk_status);
- err = filelayout_async_handle_error(task, data->args.context->state,
- data->ds_clp, hdr->lseg);
+ trace_nfs4_pnfs_write(hdr, task->tk_status);
+ err = filelayout_async_handle_error(task, hdr->args.context->state,
+ hdr->ds_clp, hdr->lseg);
switch (err) {
case -NFS4ERR_RESET_TO_MDS:
- filelayout_reset_write(data);
+ filelayout_reset_write(hdr);
return task->tk_status;
case -EAGAIN:
rpc_restart_call_prepare(task);
return -EAGAIN;
}
- filelayout_set_layoutcommit(data);
+ filelayout_set_layoutcommit(hdr);
return 0;
}
@@ -419,57 +408,57 @@ static int filelayout_commit_done_cb(struct rpc_task *task,
static void filelayout_write_prepare(struct rpc_task *task, void *data)
{
- struct nfs_pgio_data *wdata = data;
+ struct nfs_pgio_header *hdr = data;
- if (unlikely(test_bit(NFS_CONTEXT_BAD, &wdata->args.context->flags))) {
+ if (unlikely(test_bit(NFS_CONTEXT_BAD, &hdr->args.context->flags))) {
rpc_exit(task, -EIO);
return;
}
- if (filelayout_reset_to_mds(wdata->header->lseg)) {
+ if (filelayout_reset_to_mds(hdr->lseg)) {
dprintk("%s task %u reset io to MDS\n", __func__, task->tk_pid);
- filelayout_reset_write(wdata);
+ filelayout_reset_write(hdr);
rpc_exit(task, 0);
return;
}
- if (nfs41_setup_sequence(wdata->ds_clp->cl_session,
- &wdata->args.seq_args,
- &wdata->res.seq_res,
+ if (nfs41_setup_sequence(hdr->ds_clp->cl_session,
+ &hdr->args.seq_args,
+ &hdr->res.seq_res,
task))
return;
- if (nfs4_set_rw_stateid(&wdata->args.stateid, wdata->args.context,
- wdata->args.lock_context, FMODE_WRITE) == -EIO)
+ if (nfs4_set_rw_stateid(&hdr->args.stateid, hdr->args.context,
+ hdr->args.lock_context, FMODE_WRITE) == -EIO)
rpc_exit(task, -EIO); /* lost lock, terminate I/O */
}
static void filelayout_write_call_done(struct rpc_task *task, void *data)
{
- struct nfs_pgio_data *wdata = data;
+ struct nfs_pgio_header *hdr = data;
- if (test_bit(NFS_IOHDR_REDO, &wdata->header->flags) &&
+ if (test_bit(NFS_IOHDR_REDO, &hdr->flags) &&
task->tk_status == 0) {
- nfs41_sequence_done(task, &wdata->res.seq_res);
+ nfs41_sequence_done(task, &hdr->res.seq_res);
return;
}
/* Note this may cause RPC to be resent */
- wdata->header->mds_ops->rpc_call_done(task, data);
+ hdr->mds_ops->rpc_call_done(task, data);
}
static void filelayout_write_count_stats(struct rpc_task *task, void *data)
{
- struct nfs_pgio_data *wdata = data;
+ struct nfs_pgio_header *hdr = data;
- rpc_count_iostats(task, NFS_SERVER(wdata->header->inode)->client->cl_metrics);
+ rpc_count_iostats(task, NFS_SERVER(hdr->inode)->client->cl_metrics);
}
static void filelayout_write_release(void *data)
{
- struct nfs_pgio_data *wdata = data;
- struct pnfs_layout_hdr *lo = wdata->header->lseg->pls_layout;
+ struct nfs_pgio_header *hdr = data;
+ struct pnfs_layout_hdr *lo = hdr->lseg->pls_layout;
filelayout_fenceme(lo->plh_inode, lo);
- nfs_put_client(wdata->ds_clp);
- wdata->header->mds_ops->rpc_release(data);
+ nfs_put_client(hdr->ds_clp);
+ hdr->mds_ops->rpc_release(data);
}
static void filelayout_commit_prepare(struct rpc_task *task, void *data)
@@ -529,19 +518,18 @@ static const struct rpc_call_ops filelayout_commit_call_ops = {
};
static enum pnfs_try_status
-filelayout_read_pagelist(struct nfs_pgio_data *data)
+filelayout_read_pagelist(struct nfs_pgio_header *hdr)
{
- struct nfs_pgio_header *hdr = data->header;
struct pnfs_layout_segment *lseg = hdr->lseg;
struct nfs4_pnfs_ds *ds;
struct rpc_clnt *ds_clnt;
- loff_t offset = data->args.offset;
+ loff_t offset = hdr->args.offset;
u32 j, idx;
struct nfs_fh *fh;
dprintk("--> %s ino %lu pgbase %u req %Zu@%llu\n",
__func__, hdr->inode->i_ino,
- data->args.pgbase, (size_t)data->args.count, offset);
+ hdr->args.pgbase, (size_t)hdr->args.count, offset);
/* Retrieve the correct rpc_client for the byte range */
j = nfs4_fl_calc_j_index(lseg, offset);
@@ -559,30 +547,29 @@ filelayout_read_pagelist(struct nfs_pgio_data *data)
/* No multipath support. Use first DS */
atomic_inc(&ds->ds_clp->cl_count);
- data->ds_clp = ds->ds_clp;
- data->ds_idx = idx;
+ hdr->ds_clp = ds->ds_clp;
+ hdr->ds_idx = idx;
fh = nfs4_fl_select_ds_fh(lseg, j);
if (fh)
- data->args.fh = fh;
+ hdr->args.fh = fh;
- data->args.offset = filelayout_get_dserver_offset(lseg, offset);
- data->mds_offset = offset;
+ hdr->args.offset = filelayout_get_dserver_offset(lseg, offset);
+ hdr->mds_offset = offset;
/* Perform an asynchronous read to ds */
- nfs_initiate_pgio(ds_clnt, data,
+ nfs_initiate_pgio(ds_clnt, hdr,
&filelayout_read_call_ops, 0, RPC_TASK_SOFTCONN);
return PNFS_ATTEMPTED;
}
/* Perform async writes. */
static enum pnfs_try_status
-filelayout_write_pagelist(struct nfs_pgio_data *data, int sync)
+filelayout_write_pagelist(struct nfs_pgio_header *hdr, int sync)
{
- struct nfs_pgio_header *hdr = data->header;
struct pnfs_layout_segment *lseg = hdr->lseg;
struct nfs4_pnfs_ds *ds;
struct rpc_clnt *ds_clnt;
- loff_t offset = data->args.offset;
+ loff_t offset = hdr->args.offset;
u32 j, idx;
struct nfs_fh *fh;
@@ -598,21 +585,20 @@ filelayout_write_pagelist(struct nfs_pgio_data *data, int sync)
return PNFS_NOT_ATTEMPTED;
dprintk("%s ino %lu sync %d req %Zu@%llu DS: %s cl_count %d\n",
- __func__, hdr->inode->i_ino, sync, (size_t) data->args.count,
+ __func__, hdr->inode->i_ino, sync, (size_t) hdr->args.count,
offset, ds->ds_remotestr, atomic_read(&ds->ds_clp->cl_count));
- data->pgio_done_cb = filelayout_write_done_cb;
+ hdr->pgio_done_cb = filelayout_write_done_cb;
atomic_inc(&ds->ds_clp->cl_count);
- data->ds_clp = ds->ds_clp;
- data->ds_idx = idx;
+ hdr->ds_clp = ds->ds_clp;
+ hdr->ds_idx = idx;
fh = nfs4_fl_select_ds_fh(lseg, j);
if (fh)
- data->args.fh = fh;
-
- data->args.offset = filelayout_get_dserver_offset(lseg, offset);
+ hdr->args.fh = fh;
+ hdr->args.offset = filelayout_get_dserver_offset(lseg, offset);
/* Perform an asynchronous write */
- nfs_initiate_pgio(ds_clnt, data,
+ nfs_initiate_pgio(ds_clnt, hdr,
&filelayout_write_call_ops, sync,
RPC_TASK_SOFTCONN);
return PNFS_ATTEMPTED;
@@ -1023,6 +1009,7 @@ static u32 select_bucket_index(struct nfs4_filelayout_segment *fl, u32 j)
/* The generic layer is about to remove the req from the commit list.
* If this will make the bucket empty, it will need to put the lseg reference.
+ * Note this is must be called holding the inode (/cinfo) lock
*/
static void
filelayout_clear_request_commit(struct nfs_page *req,
@@ -1030,7 +1017,6 @@ filelayout_clear_request_commit(struct nfs_page *req,
{
struct pnfs_layout_segment *freeme = NULL;
- spin_lock(cinfo->lock);
if (!test_and_clear_bit(PG_COMMIT_TO_DS, &req->wb_flags))
goto out;
cinfo->ds->nwritten--;
@@ -1045,22 +1031,25 @@ filelayout_clear_request_commit(struct nfs_page *req,
}
out:
nfs_request_remove_commit_list(req, cinfo);
- spin_unlock(cinfo->lock);
- pnfs_put_lseg(freeme);
+ pnfs_put_lseg_async(freeme);
}
-static struct list_head *
-filelayout_choose_commit_list(struct nfs_page *req,
- struct pnfs_layout_segment *lseg,
- struct nfs_commit_info *cinfo)
+static void
+filelayout_mark_request_commit(struct nfs_page *req,
+ struct pnfs_layout_segment *lseg,
+ struct nfs_commit_info *cinfo)
+
{
struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg);
u32 i, j;
struct list_head *list;
struct pnfs_commit_bucket *buckets;
- if (fl->commit_through_mds)
- return &cinfo->mds->list;
+ if (fl->commit_through_mds) {
+ list = &cinfo->mds->list;
+ spin_lock(cinfo->lock);
+ goto mds_commit;
+ }
/* Note that we are calling nfs4_fl_calc_j_index on each page
* that ends up being committed to a data server. An attractive
@@ -1084,19 +1073,22 @@ filelayout_choose_commit_list(struct nfs_page *req,
}
set_bit(PG_COMMIT_TO_DS, &req->wb_flags);
cinfo->ds->nwritten++;
- spin_unlock(cinfo->lock);
- return list;
-}
-static void
-filelayout_mark_request_commit(struct nfs_page *req,
- struct pnfs_layout_segment *lseg,
- struct nfs_commit_info *cinfo)
-{
- struct list_head *list;
-
- list = filelayout_choose_commit_list(req, lseg, cinfo);
- nfs_request_add_commit_list(req, list, cinfo);
+mds_commit:
+ /* nfs_request_add_commit_list(). We need to add req to list without
+ * dropping cinfo lock.
+ */
+ set_bit(PG_CLEAN, &(req)->wb_flags);
+ nfs_list_add_request(req, list);
+ cinfo->mds->ncommit++;
+ spin_unlock(cinfo->lock);
+ if (!cinfo->dreq) {
+ inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
+ inc_bdi_stat(page_file_mapping(req->wb_page)->backing_dev_info,
+ BDI_RECLAIMABLE);
+ __mark_inode_dirty(req->wb_context->dentry->d_inode,
+ I_DIRTY_DATASYNC);
+ }
}
static u32 calc_ds_index_from_commit(struct pnfs_layout_segment *lseg, u32 i)
@@ -1244,15 +1236,64 @@ restart:
spin_unlock(cinfo->lock);
}
+/* filelayout_search_commit_reqs - Search lists in @cinfo for the head reqest
+ * for @page
+ * @cinfo - commit info for current inode
+ * @page - page to search for matching head request
+ *
+ * Returns a the head request if one is found, otherwise returns NULL.
+ */
+static struct nfs_page *
+filelayout_search_commit_reqs(struct nfs_commit_info *cinfo, struct page *page)
+{
+ struct nfs_page *freq, *t;
+ struct pnfs_commit_bucket *b;
+ int i;
+
+ /* Linearly search the commit lists for each bucket until a matching
+ * request is found */
+ for (i = 0, b = cinfo->ds->buckets; i < cinfo->ds->nbuckets; i++, b++) {
+ list_for_each_entry_safe(freq, t, &b->written, wb_list) {
+ if (freq->wb_page == page)
+ return freq->wb_head;
+ }
+ list_for_each_entry_safe(freq, t, &b->committing, wb_list) {
+ if (freq->wb_page == page)
+ return freq->wb_head;
+ }
+ }
+
+ return NULL;
+}
+
+static void filelayout_retry_commit(struct nfs_commit_info *cinfo, int idx)
+{
+ struct pnfs_ds_commit_info *fl_cinfo = cinfo->ds;
+ struct pnfs_commit_bucket *bucket;
+ struct pnfs_layout_segment *freeme;
+ int i;
+
+ for (i = idx; i < fl_cinfo->nbuckets; i++) {
+ bucket = &fl_cinfo->buckets[i];
+ if (list_empty(&bucket->committing))
+ continue;
+ nfs_retry_commit(&bucket->committing, bucket->clseg, cinfo);
+ spin_lock(cinfo->lock);
+ freeme = bucket->clseg;
+ bucket->clseg = NULL;
+ spin_unlock(cinfo->lock);
+ pnfs_put_lseg(freeme);
+ }
+}
+
static unsigned int
alloc_ds_commits(struct nfs_commit_info *cinfo, struct list_head *list)
{
struct pnfs_ds_commit_info *fl_cinfo;
struct pnfs_commit_bucket *bucket;
struct nfs_commit_data *data;
- int i, j;
+ int i;
unsigned int nreq = 0;
- struct pnfs_layout_segment *freeme;
fl_cinfo = cinfo->ds;
bucket = fl_cinfo->buckets;
@@ -1272,16 +1313,7 @@ alloc_ds_commits(struct nfs_commit_info *cinfo, struct list_head *list)
}
/* Clean up on error */
- for (j = i; j < fl_cinfo->nbuckets; j++, bucket++) {
- if (list_empty(&bucket->committing))
- continue;
- nfs_retry_commit(&bucket->committing, bucket->clseg, cinfo);
- spin_lock(cinfo->lock);
- freeme = bucket->clseg;
- bucket->clseg = NULL;
- spin_unlock(cinfo->lock);
- pnfs_put_lseg(freeme);
- }
+ filelayout_retry_commit(cinfo, i);
/* Caller will clean up entries put on list */
return nreq;
}
@@ -1301,8 +1333,12 @@ filelayout_commit_pagelist(struct inode *inode, struct list_head *mds_pages,
data->lseg = NULL;
list_add(&data->pages, &list);
nreq++;
- } else
+ } else {
nfs_retry_commit(mds_pages, NULL, cinfo);
+ filelayout_retry_commit(cinfo, 0);
+ cinfo->completion_ops->error_cleanup(NFS_I(inode));
+ return -ENOMEM;
+ }
}
nreq += alloc_ds_commits(cinfo, &list);
@@ -1380,6 +1416,7 @@ static struct pnfs_layoutdriver_type filelayout_type = {
.clear_request_commit = filelayout_clear_request_commit,
.scan_commit_lists = filelayout_scan_commit_lists,
.recover_commit_reqs = filelayout_recover_commit_reqs,
+ .search_commit_reqs = filelayout_search_commit_reqs,
.commit_pagelist = filelayout_commit_pagelist,
.read_pagelist = filelayout_read_pagelist,
.write_pagelist = filelayout_write_pagelist,
diff --git a/fs/nfs/filelayout/filelayoutdev.c b/fs/nfs/filelayout/filelayoutdev.c
index 44bf0140a4c7..8540516f4d71 100644
--- a/fs/nfs/filelayout/filelayoutdev.c
+++ b/fs/nfs/filelayout/filelayoutdev.c
@@ -695,7 +695,7 @@ filelayout_get_device_info(struct inode *inode,
if (pdev == NULL)
return NULL;
- pages = kzalloc(max_pages * sizeof(struct page *), gfp_flags);
+ pages = kcalloc(max_pages, sizeof(struct page *), gfp_flags);
if (pages == NULL) {
kfree(pdev);
return NULL;
@@ -783,8 +783,8 @@ nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j)
static void nfs4_wait_ds_connect(struct nfs4_pnfs_ds *ds)
{
might_sleep();
- wait_on_bit(&ds->ds_state, NFS4DS_CONNECTING,
- nfs_wait_bit_killable, TASK_KILLABLE);
+ wait_on_bit_action(&ds->ds_state, NFS4DS_CONNECTING,
+ nfs_wait_bit_killable, TASK_KILLABLE);
}
static void nfs4_clear_ds_conn_bit(struct nfs4_pnfs_ds *ds)
diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c
index b94f80420a58..880618a8b048 100644
--- a/fs/nfs/getroot.c
+++ b/fs/nfs/getroot.c
@@ -112,7 +112,7 @@ struct dentry *nfs_get_root(struct super_block *sb, struct nfs_fh *mntfh,
* if the dentry tree reaches them; however if the dentry already
* exists, we'll pick it up at this point and use it as the root
*/
- ret = d_obtain_alias(inode);
+ ret = d_obtain_root(inode);
if (IS_ERR(ret)) {
dprintk("nfs_get_root: get root dentry failed\n");
goto out;
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 9927913c97c2..577a36f0a510 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -75,7 +75,7 @@ nfs_fattr_to_ino_t(struct nfs_fattr *fattr)
* nfs_wait_bit_killable - helper for functions that are sleeping on bit locks
* @word: long word containing the bit lock
*/
-int nfs_wait_bit_killable(void *word)
+int nfs_wait_bit_killable(struct wait_bit_key *key)
{
if (fatal_signal_pending(current))
return -ERESTARTSYS;
@@ -1002,6 +1002,15 @@ int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
}
EXPORT_SYMBOL_GPL(nfs_revalidate_inode);
+int nfs_revalidate_inode_rcu(struct nfs_server *server, struct inode *inode)
+{
+ if (!(NFS_I(inode)->cache_validity &
+ (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_LABEL))
+ && !nfs_attribute_cache_expired(inode))
+ return NFS_STALE(inode) ? -ESTALE : 0;
+ return -ECHILD;
+}
+
static int nfs_invalidate_mapping(struct inode *inode, struct address_space *mapping)
{
struct nfs_inode *nfsi = NFS_I(inode);
@@ -1074,8 +1083,8 @@ int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping)
* the bit lock here if it looks like we're going to be doing that.
*/
for (;;) {
- ret = wait_on_bit(bitlock, NFS_INO_INVALIDATING,
- nfs_wait_bit_killable, TASK_KILLABLE);
+ ret = wait_on_bit_action(bitlock, NFS_INO_INVALIDATING,
+ nfs_wait_bit_killable, TASK_KILLABLE);
if (ret)
goto out;
spin_lock(&inode->i_lock);
@@ -1840,11 +1849,12 @@ EXPORT_SYMBOL_GPL(nfs_net_id);
static int nfs_net_init(struct net *net)
{
nfs_clients_init(net);
- return 0;
+ return nfs_fs_proc_net_init(net);
}
static void nfs_net_exit(struct net *net)
{
+ nfs_fs_proc_net_exit(net);
nfs_cleanup_cb_ident_idr(net);
}
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index f415cbf9f6c3..9056622d2230 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -195,7 +195,16 @@ extern struct rpc_clnt *nfs4_find_or_create_ds_client(struct nfs_client *,
#ifdef CONFIG_PROC_FS
extern int __init nfs_fs_proc_init(void);
extern void nfs_fs_proc_exit(void);
+extern int nfs_fs_proc_net_init(struct net *net);
+extern void nfs_fs_proc_net_exit(struct net *net);
#else
+static inline int nfs_fs_proc_net_init(struct net *net)
+{
+ return 0;
+}
+static inline void nfs_fs_proc_net_exit(struct net *net)
+{
+}
static inline int nfs_fs_proc_init(void)
{
return 0;
@@ -238,11 +247,11 @@ void nfs_set_pgio_error(struct nfs_pgio_header *hdr, int error, loff_t pos);
int nfs_iocounter_wait(struct nfs_io_counter *c);
extern const struct nfs_pageio_ops nfs_pgio_rw_ops;
-struct nfs_rw_header *nfs_rw_header_alloc(const struct nfs_rw_ops *);
-void nfs_rw_header_free(struct nfs_pgio_header *);
-void nfs_pgio_data_release(struct nfs_pgio_data *);
+struct nfs_pgio_header *nfs_pgio_header_alloc(const struct nfs_rw_ops *);
+void nfs_pgio_header_free(struct nfs_pgio_header *);
+void nfs_pgio_data_destroy(struct nfs_pgio_header *);
int nfs_generic_pgio(struct nfs_pageio_descriptor *, struct nfs_pgio_header *);
-int nfs_initiate_pgio(struct rpc_clnt *, struct nfs_pgio_data *,
+int nfs_initiate_pgio(struct rpc_clnt *, struct nfs_pgio_header *,
const struct rpc_call_ops *, int, int);
void nfs_free_request(struct nfs_page *req);
@@ -348,7 +357,7 @@ extern int nfs_drop_inode(struct inode *);
extern void nfs_clear_inode(struct inode *);
extern void nfs_evict_inode(struct inode *);
void nfs_zap_acl_cache(struct inode *inode);
-extern int nfs_wait_bit_killable(void *word);
+extern int nfs_wait_bit_killable(struct wait_bit_key *key);
/* super.c */
extern const struct super_operations nfs_sops;
@@ -442,6 +451,7 @@ int nfs_scan_commit(struct inode *inode, struct list_head *dst,
void nfs_mark_request_commit(struct nfs_page *req,
struct pnfs_layout_segment *lseg,
struct nfs_commit_info *cinfo);
+int nfs_write_need_commit(struct nfs_pgio_header *);
int nfs_generic_commit_list(struct inode *inode, struct list_head *head,
int how, struct nfs_commit_info *cinfo);
void nfs_retry_commit(struct list_head *page_list,
@@ -482,7 +492,7 @@ static inline void nfs_inode_dio_wait(struct inode *inode)
extern ssize_t nfs_dreq_bytes_left(struct nfs_direct_req *dreq);
/* nfs4proc.c */
-extern void __nfs4_read_done_cb(struct nfs_pgio_data *);
+extern void __nfs4_read_done_cb(struct nfs_pgio_header *);
extern struct nfs_client *nfs4_init_client(struct nfs_client *clp,
const struct rpc_timeout *timeparms,
const char *ip_addr);
diff --git a/fs/nfs/netns.h b/fs/nfs/netns.h
index 8ee1fab83268..ef221fb8a183 100644
--- a/fs/nfs/netns.h
+++ b/fs/nfs/netns.h
@@ -29,6 +29,9 @@ struct nfs_net {
#endif
spinlock_t nfs_client_lock;
struct timespec boot_time;
+#ifdef CONFIG_PROC_FS
+ struct proc_dir_entry *proc_nfsfs;
+#endif
};
extern int nfs_net_id;
diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c
index 8f854dde4150..24c6898159cc 100644
--- a/fs/nfs/nfs3acl.c
+++ b/fs/nfs/nfs3acl.c
@@ -129,7 +129,10 @@ static int __nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl,
.rpc_argp = &args,
.rpc_resp = &fattr,
};
- int status;
+ int status = 0;
+
+ if (acl == NULL && (!S_ISDIR(inode->i_mode) || dfacl == NULL))
+ goto out;
status = -EOPNOTSUPP;
if (!nfs_server_capable(inode, NFS_CAP_ACLS))
@@ -256,7 +259,7 @@ nfs3_list_one_acl(struct inode *inode, int type, const char *name, void *data,
char *p = data + *result;
acl = get_acl(inode, type);
- if (!acl)
+ if (IS_ERR_OR_NULL(acl))
return 0;
posix_acl_release(acl);
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index f0afa291fd58..809670eba52a 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -795,41 +795,44 @@ nfs3_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
return status;
}
-static int nfs3_read_done(struct rpc_task *task, struct nfs_pgio_data *data)
+static int nfs3_read_done(struct rpc_task *task, struct nfs_pgio_header *hdr)
{
- struct inode *inode = data->header->inode;
+ struct inode *inode = hdr->inode;
if (nfs3_async_handle_jukebox(task, inode))
return -EAGAIN;
nfs_invalidate_atime(inode);
- nfs_refresh_inode(inode, &data->fattr);
+ nfs_refresh_inode(inode, &hdr->fattr);
return 0;
}
-static void nfs3_proc_read_setup(struct nfs_pgio_data *data, struct rpc_message *msg)
+static void nfs3_proc_read_setup(struct nfs_pgio_header *hdr,
+ struct rpc_message *msg)
{
msg->rpc_proc = &nfs3_procedures[NFS3PROC_READ];
}
-static int nfs3_proc_pgio_rpc_prepare(struct rpc_task *task, struct nfs_pgio_data *data)
+static int nfs3_proc_pgio_rpc_prepare(struct rpc_task *task,
+ struct nfs_pgio_header *hdr)
{
rpc_call_start(task);
return 0;
}
-static int nfs3_write_done(struct rpc_task *task, struct nfs_pgio_data *data)
+static int nfs3_write_done(struct rpc_task *task, struct nfs_pgio_header *hdr)
{
- struct inode *inode = data->header->inode;
+ struct inode *inode = hdr->inode;
if (nfs3_async_handle_jukebox(task, inode))
return -EAGAIN;
if (task->tk_status >= 0)
- nfs_post_op_update_inode_force_wcc(inode, data->res.fattr);
+ nfs_post_op_update_inode_force_wcc(inode, hdr->res.fattr);
return 0;
}
-static void nfs3_proc_write_setup(struct nfs_pgio_data *data, struct rpc_message *msg)
+static void nfs3_proc_write_setup(struct nfs_pgio_header *hdr,
+ struct rpc_message *msg)
{
msg->rpc_proc = &nfs3_procedures[NFS3PROC_WRITE];
}
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index ba2affa51941..a8b855ab4e22 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -54,7 +54,7 @@ struct nfs4_minor_version_ops {
const nfs4_stateid *);
int (*find_root_sec)(struct nfs_server *, struct nfs_fh *,
struct nfs_fsinfo *);
- int (*free_lock_state)(struct nfs_server *,
+ void (*free_lock_state)(struct nfs_server *,
struct nfs4_lock_state *);
const struct rpc_call_ops *call_sync_ops;
const struct nfs4_state_recovery_ops *reboot_recovery_ops;
@@ -129,17 +129,6 @@ enum {
* LOCK: one nfs4_state (LOCK) to hold the lock stateid nfs4_state(OPEN)
*/
-struct nfs4_lock_owner {
- unsigned int lo_type;
-#define NFS4_ANY_LOCK_TYPE (0U)
-#define NFS4_FLOCK_LOCK_TYPE (1U << 0)
-#define NFS4_POSIX_LOCK_TYPE (1U << 1)
- union {
- fl_owner_t posix_owner;
- pid_t flock_owner;
- } lo_u;
-};
-
struct nfs4_lock_state {
struct list_head ls_locks; /* Other lock stateids */
struct nfs4_state * ls_state; /* Pointer to open state */
@@ -149,7 +138,7 @@ struct nfs4_lock_state {
struct nfs_seqid_counter ls_seqid;
nfs4_stateid ls_stateid;
atomic_t ls_count;
- struct nfs4_lock_owner ls_owner;
+ fl_owner_t ls_owner;
};
/* bits for nfs4_state->flags */
@@ -337,11 +326,11 @@ nfs4_state_protect(struct nfs_client *clp, unsigned long sp4_mode,
*/
static inline void
nfs4_state_protect_write(struct nfs_client *clp, struct rpc_clnt **clntp,
- struct rpc_message *msg, struct nfs_pgio_data *wdata)
+ struct rpc_message *msg, struct nfs_pgio_header *hdr)
{
if (_nfs4_state_protect(clp, NFS_SP4_MACH_CRED_WRITE, clntp, msg) &&
!test_bit(NFS_SP4_MACH_CRED_COMMIT, &clp->cl_sp4_flags))
- wdata->args.stable = NFS_FILE_SYNC;
+ hdr->args.stable = NFS_FILE_SYNC;
}
#else /* CONFIG_NFS_v4_1 */
static inline struct nfs4_session *nfs4_get_session(const struct nfs_server *server)
@@ -369,7 +358,7 @@ nfs4_state_protect(struct nfs_client *clp, unsigned long sp4_flags,
static inline void
nfs4_state_protect_write(struct nfs_client *clp, struct rpc_clnt **clntp,
- struct rpc_message *msg, struct nfs_pgio_data *wdata)
+ struct rpc_message *msg, struct nfs_pgio_header *hdr)
{
}
#endif /* CONFIG_NFS_V4_1 */
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index aa9ef4876046..ffdb28d86cf8 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -482,6 +482,16 @@ int nfs40_walk_client_list(struct nfs_client *new,
spin_lock(&nn->nfs_client_lock);
list_for_each_entry(pos, &nn->nfs_client_list, cl_share_link) {
+
+ if (pos->rpc_ops != new->rpc_ops)
+ continue;
+
+ if (pos->cl_proto != new->cl_proto)
+ continue;
+
+ if (pos->cl_minorversion != new->cl_minorversion)
+ continue;
+
/* If "pos" isn't marked ready, we can't trust the
* remaining fields in "pos" */
if (pos->cl_cons_state > NFS_CS_READY) {
@@ -501,15 +511,6 @@ int nfs40_walk_client_list(struct nfs_client *new,
if (pos->cl_cons_state != NFS_CS_READY)
continue;
- if (pos->rpc_ops != new->rpc_ops)
- continue;
-
- if (pos->cl_proto != new->cl_proto)
- continue;
-
- if (pos->cl_minorversion != new->cl_minorversion)
- continue;
-
if (pos->cl_clientid != new->cl_clientid)
continue;
@@ -622,6 +623,16 @@ int nfs41_walk_client_list(struct nfs_client *new,
spin_lock(&nn->nfs_client_lock);
list_for_each_entry(pos, &nn->nfs_client_list, cl_share_link) {
+
+ if (pos->rpc_ops != new->rpc_ops)
+ continue;
+
+ if (pos->cl_proto != new->cl_proto)
+ continue;
+
+ if (pos->cl_minorversion != new->cl_minorversion)
+ continue;
+
/* If "pos" isn't marked ready, we can't trust the
* remaining fields in "pos", especially the client
* ID and serverowner fields. Wait for CREATE_SESSION
@@ -647,15 +658,6 @@ int nfs41_walk_client_list(struct nfs_client *new,
if (pos->cl_cons_state != NFS_CS_READY)
continue;
- if (pos->rpc_ops != new->rpc_ops)
- continue;
-
- if (pos->cl_proto != new->cl_proto)
- continue;
-
- if (pos->cl_minorversion != new->cl_minorversion)
- continue;
-
if (!nfs4_match_clientids(pos, new))
continue;
@@ -855,6 +857,11 @@ struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp,
};
struct rpc_timeout ds_timeout;
struct nfs_client *clp;
+ char buf[INET6_ADDRSTRLEN + 1];
+
+ if (rpc_ntop(ds_addr, buf, sizeof(buf)) <= 0)
+ return ERR_PTR(-EINVAL);
+ cl_init.hostname = buf;
/*
* Set an authflavor equual to the MDS value. Use the MDS nfs_client
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 4bf3d97cc5a0..6ca0c8e7a945 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -1952,6 +1952,14 @@ static int _nfs4_recover_proc_open(struct nfs4_opendata *data)
return status;
}
+/*
+ * Additional permission checks in order to distinguish between an
+ * open for read, and an open for execute. This works around the
+ * fact that NFSv4 OPEN treats read and execute permissions as being
+ * the same.
+ * Note that in the non-execute case, we want to turn off permission
+ * checking if we just created a new file (POSIX open() semantics).
+ */
static int nfs4_opendata_access(struct rpc_cred *cred,
struct nfs4_opendata *opendata,
struct nfs4_state *state, fmode_t fmode,
@@ -1966,14 +1974,14 @@ static int nfs4_opendata_access(struct rpc_cred *cred,
return 0;
mask = 0;
- /* don't check MAY_WRITE - a newly created file may not have
- * write mode bits, but POSIX allows the creating process to write.
- * use openflags to check for exec, because fmode won't
- * always have FMODE_EXEC set when file open for exec. */
+ /*
+ * Use openflags to check for exec, because fmode won't
+ * always have FMODE_EXEC set when file open for exec.
+ */
if (openflags & __FMODE_EXEC) {
/* ONLY check for exec rights */
mask = MAY_EXEC;
- } else if (fmode & FMODE_READ)
+ } else if ((fmode & FMODE_READ) && !opendata->file_created)
mask = MAY_READ;
cache.cred = cred;
@@ -2216,8 +2224,19 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata,
seq = raw_seqcount_begin(&sp->so_reclaim_seqcount);
ret = _nfs4_proc_open(opendata);
- if (ret != 0)
+ if (ret != 0) {
+ if (ret == -ENOENT) {
+ dentry = opendata->dentry;
+ if (dentry->d_inode)
+ d_delete(dentry);
+ else if (d_unhashed(dentry))
+ d_add(dentry, NULL);
+
+ nfs_set_verifier(dentry,
+ nfs_save_change_attribute(opendata->dir->d_inode));
+ }
goto out;
+ }
state = nfs4_opendata_to_nfs4_state(opendata);
ret = PTR_ERR(state);
@@ -2545,6 +2564,7 @@ static void nfs4_close_done(struct rpc_task *task, void *data)
struct nfs4_closedata *calldata = data;
struct nfs4_state *state = calldata->state;
struct nfs_server *server = NFS_SERVER(calldata->inode);
+ nfs4_stateid *res_stateid = NULL;
dprintk("%s: begin!\n", __func__);
if (!nfs4_sequence_done(task, &calldata->res.seq_res))
@@ -2555,12 +2575,12 @@ static void nfs4_close_done(struct rpc_task *task, void *data)
*/
switch (task->tk_status) {
case 0:
- if (calldata->roc)
+ res_stateid = &calldata->res.stateid;
+ if (calldata->arg.fmode == 0 && calldata->roc)
pnfs_roc_set_barrier(state->inode,
calldata->roc_barrier);
- nfs_clear_open_stateid(state, &calldata->res.stateid, 0);
renew_lease(server, calldata->timestamp);
- goto out_release;
+ break;
case -NFS4ERR_ADMIN_REVOKED:
case -NFS4ERR_STALE_STATEID:
case -NFS4ERR_OLD_STATEID:
@@ -2574,7 +2594,7 @@ static void nfs4_close_done(struct rpc_task *task, void *data)
goto out_release;
}
}
- nfs_clear_open_stateid(state, NULL, calldata->arg.fmode);
+ nfs_clear_open_stateid(state, res_stateid, calldata->arg.fmode);
out_release:
nfs_release_seqid(calldata->arg.seqid);
nfs_refresh_inode(calldata->inode, calldata->res.fattr);
@@ -2586,6 +2606,7 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
struct nfs4_closedata *calldata = data;
struct nfs4_state *state = calldata->state;
struct inode *inode = calldata->inode;
+ bool is_rdonly, is_wronly, is_rdwr;
int call_close = 0;
dprintk("%s: begin!\n", __func__);
@@ -2593,21 +2614,27 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
goto out_wait;
task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_DOWNGRADE];
- calldata->arg.fmode = FMODE_READ|FMODE_WRITE;
spin_lock(&state->owner->so_lock);
+ is_rdwr = test_bit(NFS_O_RDWR_STATE, &state->flags);
+ is_rdonly = test_bit(NFS_O_RDONLY_STATE, &state->flags);
+ is_wronly = test_bit(NFS_O_WRONLY_STATE, &state->flags);
/* Calculate the change in open mode */
+ calldata->arg.fmode = 0;
if (state->n_rdwr == 0) {
- if (state->n_rdonly == 0) {
- call_close |= test_bit(NFS_O_RDONLY_STATE, &state->flags);
- call_close |= test_bit(NFS_O_RDWR_STATE, &state->flags);
- calldata->arg.fmode &= ~FMODE_READ;
- }
- if (state->n_wronly == 0) {
- call_close |= test_bit(NFS_O_WRONLY_STATE, &state->flags);
- call_close |= test_bit(NFS_O_RDWR_STATE, &state->flags);
- calldata->arg.fmode &= ~FMODE_WRITE;
- }
- }
+ if (state->n_rdonly == 0)
+ call_close |= is_rdonly;
+ else if (is_rdonly)
+ calldata->arg.fmode |= FMODE_READ;
+ if (state->n_wronly == 0)
+ call_close |= is_wronly;
+ else if (is_wronly)
+ calldata->arg.fmode |= FMODE_WRITE;
+ } else if (is_rdwr)
+ calldata->arg.fmode |= FMODE_READ|FMODE_WRITE;
+
+ if (calldata->arg.fmode == 0)
+ call_close |= is_rdwr;
+
if (!nfs4_valid_open_stateid(state))
call_close = 0;
spin_unlock(&state->owner->so_lock);
@@ -2647,6 +2674,48 @@ static const struct rpc_call_ops nfs4_close_ops = {
.rpc_release = nfs4_free_closedata,
};
+static bool nfs4_state_has_opener(struct nfs4_state *state)
+{
+ /* first check existing openers */
+ if (test_bit(NFS_O_RDONLY_STATE, &state->flags) != 0 &&
+ state->n_rdonly != 0)
+ return true;
+
+ if (test_bit(NFS_O_WRONLY_STATE, &state->flags) != 0 &&
+ state->n_wronly != 0)
+ return true;
+
+ if (test_bit(NFS_O_RDWR_STATE, &state->flags) != 0 &&
+ state->n_rdwr != 0)
+ return true;
+
+ return false;
+}
+
+static bool nfs4_roc(struct inode *inode)
+{
+ struct nfs_inode *nfsi = NFS_I(inode);
+ struct nfs_open_context *ctx;
+ struct nfs4_state *state;
+
+ spin_lock(&inode->i_lock);
+ list_for_each_entry(ctx, &nfsi->open_files, list) {
+ state = ctx->state;
+ if (state == NULL)
+ continue;
+ if (nfs4_state_has_opener(state)) {
+ spin_unlock(&inode->i_lock);
+ return false;
+ }
+ }
+ spin_unlock(&inode->i_lock);
+
+ if (nfs4_check_delegation(inode, FMODE_READ))
+ return false;
+
+ return pnfs_roc(inode);
+}
+
/*
* It is possible for data to be read/written from a mem-mapped file
* after the sys_close call (which hits the vfs layer as a flush).
@@ -2697,7 +2766,7 @@ int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait)
calldata->res.fattr = &calldata->fattr;
calldata->res.seqid = calldata->arg.seqid;
calldata->res.server = server;
- calldata->roc = pnfs_roc(state->inode);
+ calldata->roc = nfs4_roc(state->inode);
nfs_sb_active(calldata->inode->i_sb);
msg.rpc_argp = &calldata->arg;
@@ -4033,24 +4102,25 @@ static bool nfs4_error_stateid_expired(int err)
return false;
}
-void __nfs4_read_done_cb(struct nfs_pgio_data *data)
+void __nfs4_read_done_cb(struct nfs_pgio_header *hdr)
{
- nfs_invalidate_atime(data->header->inode);
+ nfs_invalidate_atime(hdr->inode);
}
-static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_pgio_data *data)
+static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_pgio_header *hdr)
{
- struct nfs_server *server = NFS_SERVER(data->header->inode);
+ struct nfs_server *server = NFS_SERVER(hdr->inode);
- trace_nfs4_read(data, task->tk_status);
- if (nfs4_async_handle_error(task, server, data->args.context->state) == -EAGAIN) {
+ trace_nfs4_read(hdr, task->tk_status);
+ if (nfs4_async_handle_error(task, server,
+ hdr->args.context->state) == -EAGAIN) {
rpc_restart_call_prepare(task);
return -EAGAIN;
}
- __nfs4_read_done_cb(data);
+ __nfs4_read_done_cb(hdr);
if (task->tk_status > 0)
- renew_lease(server, data->timestamp);
+ renew_lease(server, hdr->timestamp);
return 0;
}
@@ -4068,54 +4138,59 @@ static bool nfs4_read_stateid_changed(struct rpc_task *task,
return true;
}
-static int nfs4_read_done(struct rpc_task *task, struct nfs_pgio_data *data)
+static int nfs4_read_done(struct rpc_task *task, struct nfs_pgio_header *hdr)
{
dprintk("--> %s\n", __func__);
- if (!nfs4_sequence_done(task, &data->res.seq_res))
+ if (!nfs4_sequence_done(task, &hdr->res.seq_res))
return -EAGAIN;
- if (nfs4_read_stateid_changed(task, &data->args))
+ if (nfs4_read_stateid_changed(task, &hdr->args))
return -EAGAIN;
- return data->pgio_done_cb ? data->pgio_done_cb(task, data) :
- nfs4_read_done_cb(task, data);
+ return hdr->pgio_done_cb ? hdr->pgio_done_cb(task, hdr) :
+ nfs4_read_done_cb(task, hdr);
}
-static void nfs4_proc_read_setup(struct nfs_pgio_data *data, struct rpc_message *msg)
+static void nfs4_proc_read_setup(struct nfs_pgio_header *hdr,
+ struct rpc_message *msg)
{
- data->timestamp = jiffies;
- data->pgio_done_cb = nfs4_read_done_cb;
+ hdr->timestamp = jiffies;
+ hdr->pgio_done_cb = nfs4_read_done_cb;
msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ];
- nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 0);
+ nfs4_init_sequence(&hdr->args.seq_args, &hdr->res.seq_res, 0);
}
-static int nfs4_proc_pgio_rpc_prepare(struct rpc_task *task, struct nfs_pgio_data *data)
+static int nfs4_proc_pgio_rpc_prepare(struct rpc_task *task,
+ struct nfs_pgio_header *hdr)
{
- if (nfs4_setup_sequence(NFS_SERVER(data->header->inode),
- &data->args.seq_args,
- &data->res.seq_res,
+ if (nfs4_setup_sequence(NFS_SERVER(hdr->inode),
+ &hdr->args.seq_args,
+ &hdr->res.seq_res,
task))
return 0;
- if (nfs4_set_rw_stateid(&data->args.stateid, data->args.context,
- data->args.lock_context, data->header->rw_ops->rw_mode) == -EIO)
+ if (nfs4_set_rw_stateid(&hdr->args.stateid, hdr->args.context,
+ hdr->args.lock_context,
+ hdr->rw_ops->rw_mode) == -EIO)
return -EIO;
- if (unlikely(test_bit(NFS_CONTEXT_BAD, &data->args.context->flags)))
+ if (unlikely(test_bit(NFS_CONTEXT_BAD, &hdr->args.context->flags)))
return -EIO;
return 0;
}
-static int nfs4_write_done_cb(struct rpc_task *task, struct nfs_pgio_data *data)
+static int nfs4_write_done_cb(struct rpc_task *task,
+ struct nfs_pgio_header *hdr)
{
- struct inode *inode = data->header->inode;
+ struct inode *inode = hdr->inode;
- trace_nfs4_write(data, task->tk_status);
- if (nfs4_async_handle_error(task, NFS_SERVER(inode), data->args.context->state) == -EAGAIN) {
+ trace_nfs4_write(hdr, task->tk_status);
+ if (nfs4_async_handle_error(task, NFS_SERVER(inode),
+ hdr->args.context->state) == -EAGAIN) {
rpc_restart_call_prepare(task);
return -EAGAIN;
}
if (task->tk_status >= 0) {
- renew_lease(NFS_SERVER(inode), data->timestamp);
- nfs_post_op_update_inode_force_wcc(inode, &data->fattr);
+ renew_lease(NFS_SERVER(inode), hdr->timestamp);
+ nfs_post_op_update_inode_force_wcc(inode, &hdr->fattr);
}
return 0;
}
@@ -4134,23 +4209,21 @@ static bool nfs4_write_stateid_changed(struct rpc_task *task,
return true;
}
-static int nfs4_write_done(struct rpc_task *task, struct nfs_pgio_data *data)
+static int nfs4_write_done(struct rpc_task *task, struct nfs_pgio_header *hdr)
{
- if (!nfs4_sequence_done(task, &data->res.seq_res))
+ if (!nfs4_sequence_done(task, &hdr->res.seq_res))
return -EAGAIN;
- if (nfs4_write_stateid_changed(task, &data->args))
+ if (nfs4_write_stateid_changed(task, &hdr->args))
return -EAGAIN;
- return data->pgio_done_cb ? data->pgio_done_cb(task, data) :
- nfs4_write_done_cb(task, data);
+ return hdr->pgio_done_cb ? hdr->pgio_done_cb(task, hdr) :
+ nfs4_write_done_cb(task, hdr);
}
static
-bool nfs4_write_need_cache_consistency_data(const struct nfs_pgio_data *data)
+bool nfs4_write_need_cache_consistency_data(struct nfs_pgio_header *hdr)
{
- const struct nfs_pgio_header *hdr = data->header;
-
/* Don't request attributes for pNFS or O_DIRECT writes */
- if (data->ds_clp != NULL || hdr->dreq != NULL)
+ if (hdr->ds_clp != NULL || hdr->dreq != NULL)
return false;
/* Otherwise, request attributes if and only if we don't hold
* a delegation
@@ -4158,23 +4231,24 @@ bool nfs4_write_need_cache_consistency_data(const struct nfs_pgio_data *data)
return nfs4_have_delegation(hdr->inode, FMODE_READ) == 0;
}
-static void nfs4_proc_write_setup(struct nfs_pgio_data *data, struct rpc_message *msg)
+static void nfs4_proc_write_setup(struct nfs_pgio_header *hdr,
+ struct rpc_message *msg)
{
- struct nfs_server *server = NFS_SERVER(data->header->inode);
+ struct nfs_server *server = NFS_SERVER(hdr->inode);
- if (!nfs4_write_need_cache_consistency_data(data)) {
- data->args.bitmask = NULL;
- data->res.fattr = NULL;
+ if (!nfs4_write_need_cache_consistency_data(hdr)) {
+ hdr->args.bitmask = NULL;
+ hdr->res.fattr = NULL;
} else
- data->args.bitmask = server->cache_consistency_bitmask;
+ hdr->args.bitmask = server->cache_consistency_bitmask;
- if (!data->pgio_done_cb)
- data->pgio_done_cb = nfs4_write_done_cb;
- data->res.server = server;
- data->timestamp = jiffies;
+ if (!hdr->pgio_done_cb)
+ hdr->pgio_done_cb = nfs4_write_done_cb;
+ hdr->res.server = server;
+ hdr->timestamp = jiffies;
msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_WRITE];
- nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 1);
+ nfs4_init_sequence(&hdr->args.seq_args, &hdr->res.seq_res, 1);
}
static void nfs4_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data)
@@ -4881,6 +4955,18 @@ nfs4_init_callback_netid(const struct nfs_client *clp, char *buf, size_t len)
return scnprintf(buf, len, "tcp");
}
+static void nfs4_setclientid_done(struct rpc_task *task, void *calldata)
+{
+ struct nfs4_setclientid *sc = calldata;
+
+ if (task->tk_status == 0)
+ sc->sc_cred = get_rpccred(task->tk_rqstp->rq_cred);
+}
+
+static const struct rpc_call_ops nfs4_setclientid_ops = {
+ .rpc_call_done = nfs4_setclientid_done,
+};
+
/**
* nfs4_proc_setclientid - Negotiate client ID
* @clp: state data structure
@@ -4907,6 +4993,14 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program,
.rpc_resp = res,
.rpc_cred = cred,
};
+ struct rpc_task *task;
+ struct rpc_task_setup task_setup_data = {
+ .rpc_client = clp->cl_rpcclient,
+ .rpc_message = &msg,
+ .callback_ops = &nfs4_setclientid_ops,
+ .callback_data = &setclientid,
+ .flags = RPC_TASK_TIMEOUT,
+ };
int status;
/* nfs_client_id4 */
@@ -4933,7 +5027,18 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program,
dprintk("NFS call setclientid auth=%s, '%.*s'\n",
clp->cl_rpcclient->cl_auth->au_ops->au_name,
setclientid.sc_name_len, setclientid.sc_name);
- status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT);
+ task = rpc_run_task(&task_setup_data);
+ if (IS_ERR(task)) {
+ status = PTR_ERR(task);
+ goto out;
+ }
+ status = task->tk_status;
+ if (setclientid.sc_cred) {
+ clp->cl_acceptor = rpcauth_stringify_acceptor(setclientid.sc_cred);
+ put_rpccred(setclientid.sc_cred);
+ }
+ rpc_put_task(task);
+out:
trace_nfs4_setclientid(clp, status);
dprintk("NFS reply setclientid: %d\n", status);
return status;
@@ -4975,6 +5080,9 @@ struct nfs4_delegreturndata {
unsigned long timestamp;
struct nfs_fattr fattr;
int rpc_status;
+ struct inode *inode;
+ bool roc;
+ u32 roc_barrier;
};
static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata)
@@ -4988,7 +5096,6 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata)
switch (task->tk_status) {
case 0:
renew_lease(data->res.server, data->timestamp);
- break;
case -NFS4ERR_ADMIN_REVOKED:
case -NFS4ERR_DELEG_REVOKED:
case -NFS4ERR_BAD_STATEID:
@@ -4996,6 +5103,8 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata)
case -NFS4ERR_STALE_STATEID:
case -NFS4ERR_EXPIRED:
task->tk_status = 0;
+ if (data->roc)
+ pnfs_roc_set_barrier(data->inode, data->roc_barrier);
break;
default:
if (nfs4_async_handle_error(task, data->res.server, NULL) ==
@@ -5009,6 +5118,10 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata)
static void nfs4_delegreturn_release(void *calldata)
{
+ struct nfs4_delegreturndata *data = calldata;
+
+ if (data->roc)
+ pnfs_roc_release(data->inode);
kfree(calldata);
}
@@ -5018,6 +5131,10 @@ static void nfs4_delegreturn_prepare(struct rpc_task *task, void *data)
d_data = (struct nfs4_delegreturndata *)data;
+ if (d_data->roc &&
+ pnfs_roc_drain(d_data->inode, &d_data->roc_barrier, task))
+ return;
+
nfs4_setup_sequence(d_data->res.server,
&d_data->args.seq_args,
&d_data->res.seq_res,
@@ -5061,6 +5178,9 @@ static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, co
nfs_fattr_init(data->res.fattr);
data->timestamp = jiffies;
data->rpc_status = 0;
+ data->inode = inode;
+ data->roc = list_empty(&NFS_I(inode)->open_files) ?
+ pnfs_roc(inode) : false;
task_setup_data.callback_data = data;
msg.rpc_argp = &data->args;
@@ -5834,8 +5954,10 @@ struct nfs_release_lockowner_data {
static void nfs4_release_lockowner_prepare(struct rpc_task *task, void *calldata)
{
struct nfs_release_lockowner_data *data = calldata;
- nfs40_setup_sequence(data->server,
- &data->args.seq_args, &data->res.seq_res, task);
+ struct nfs_server *server = data->server;
+ nfs40_setup_sequence(server, &data->args.seq_args,
+ &data->res.seq_res, task);
+ data->args.lock_owner.clientid = server->nfs_client->cl_clientid;
data->timestamp = jiffies;
}
@@ -5852,6 +5974,8 @@ static void nfs4_release_lockowner_done(struct rpc_task *task, void *calldata)
break;
case -NFS4ERR_STALE_CLIENTID:
case -NFS4ERR_EXPIRED:
+ nfs4_schedule_lease_recovery(server->nfs_client);
+ break;
case -NFS4ERR_LEASE_MOVED:
case -NFS4ERR_DELAY:
if (nfs4_async_handle_error(task, server, NULL) == -EAGAIN)
@@ -5872,7 +5996,8 @@ static const struct rpc_call_ops nfs4_release_lockowner_ops = {
.rpc_release = nfs4_release_lockowner_release,
};
-static int nfs4_release_lockowner(struct nfs_server *server, struct nfs4_lock_state *lsp)
+static void
+nfs4_release_lockowner(struct nfs_server *server, struct nfs4_lock_state *lsp)
{
struct nfs_release_lockowner_data *data;
struct rpc_message msg = {
@@ -5880,11 +6005,11 @@ static int nfs4_release_lockowner(struct nfs_server *server, struct nfs4_lock_st
};
if (server->nfs_client->cl_mvops->minor_version != 0)
- return -EINVAL;
+ return;
data = kmalloc(sizeof(*data), GFP_NOFS);
if (!data)
- return -ENOMEM;
+ return;
data->lsp = lsp;
data->server = server;
data->args.lock_owner.clientid = server->nfs_client->cl_clientid;
@@ -5895,7 +6020,6 @@ static int nfs4_release_lockowner(struct nfs_server *server, struct nfs4_lock_st
msg.rpc_resp = &data->res;
nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 0);
rpc_call_async(server->client, &msg, 0, &nfs4_release_lockowner_ops, data);
- return 0;
}
#define XATTR_NAME_NFSV4_ACL "system.nfs4_acl"
@@ -8182,7 +8306,8 @@ static int nfs41_free_stateid(struct nfs_server *server,
return ret;
}
-static int nfs41_free_lock_state(struct nfs_server *server, struct nfs4_lock_state *lsp)
+static void
+nfs41_free_lock_state(struct nfs_server *server, struct nfs4_lock_state *lsp)
{
struct rpc_task *task;
struct rpc_cred *cred = lsp->ls_state->owner->so_cred;
@@ -8190,9 +8315,8 @@ static int nfs41_free_lock_state(struct nfs_server *server, struct nfs4_lock_sta
task = _nfs41_free_stateid(server, &lsp->ls_stateid, cred, false);
nfs4_free_lock_state(server, lsp);
if (IS_ERR(task))
- return PTR_ERR(task);
+ return;
rpc_put_task(task);
- return 0;
}
static bool nfs41_match_stateid(const nfs4_stateid *s1,
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 848f6853c59e..22fe35104c0c 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -787,21 +787,12 @@ void nfs4_close_sync(struct nfs4_state *state, fmode_t fmode)
* that is compatible with current->files
*/
static struct nfs4_lock_state *
-__nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t fl_owner, pid_t fl_pid, unsigned int type)
+__nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t fl_owner)
{
struct nfs4_lock_state *pos;
list_for_each_entry(pos, &state->lock_states, ls_locks) {
- if (type != NFS4_ANY_LOCK_TYPE && pos->ls_owner.lo_type != type)
+ if (pos->ls_owner != fl_owner)
continue;
- switch (pos->ls_owner.lo_type) {
- case NFS4_POSIX_LOCK_TYPE:
- if (pos->ls_owner.lo_u.posix_owner != fl_owner)
- continue;
- break;
- case NFS4_FLOCK_LOCK_TYPE:
- if (pos->ls_owner.lo_u.flock_owner != fl_pid)
- continue;
- }
atomic_inc(&pos->ls_count);
return pos;
}
@@ -813,7 +804,7 @@ __nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t fl_owner, pid_t fl_p
* exists, return an uninitialized one.
*
*/
-static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, fl_owner_t fl_owner, pid_t fl_pid, unsigned int type)
+static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, fl_owner_t fl_owner)
{
struct nfs4_lock_state *lsp;
struct nfs_server *server = state->owner->so_server;
@@ -824,17 +815,7 @@ static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, f
nfs4_init_seqid_counter(&lsp->ls_seqid);
atomic_set(&lsp->ls_count, 1);
lsp->ls_state = state;
- lsp->ls_owner.lo_type = type;
- switch (lsp->ls_owner.lo_type) {
- case NFS4_FLOCK_LOCK_TYPE:
- lsp->ls_owner.lo_u.flock_owner = fl_pid;
- break;
- case NFS4_POSIX_LOCK_TYPE:
- lsp->ls_owner.lo_u.posix_owner = fl_owner;
- break;
- default:
- goto out_free;
- }
+ lsp->ls_owner = fl_owner;
lsp->ls_seqid.owner_id = ida_simple_get(&server->lockowner_id, 0, 0, GFP_NOFS);
if (lsp->ls_seqid.owner_id < 0)
goto out_free;
@@ -857,13 +838,13 @@ void nfs4_free_lock_state(struct nfs_server *server, struct nfs4_lock_state *lsp
* exists, return an uninitialized one.
*
*/
-static struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_owner_t owner, pid_t pid, unsigned int type)
+static struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_owner_t owner)
{
struct nfs4_lock_state *lsp, *new = NULL;
for(;;) {
spin_lock(&state->state_lock);
- lsp = __nfs4_find_lock_state(state, owner, pid, type);
+ lsp = __nfs4_find_lock_state(state, owner);
if (lsp != NULL)
break;
if (new != NULL) {
@@ -874,7 +855,7 @@ static struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_
break;
}
spin_unlock(&state->state_lock);
- new = nfs4_alloc_lock_state(state, owner, pid, type);
+ new = nfs4_alloc_lock_state(state, owner);
if (new == NULL)
return NULL;
}
@@ -935,13 +916,7 @@ int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl)
if (fl->fl_ops != NULL)
return 0;
- if (fl->fl_flags & FL_POSIX)
- lsp = nfs4_get_lock_state(state, fl->fl_owner, 0, NFS4_POSIX_LOCK_TYPE);
- else if (fl->fl_flags & FL_FLOCK)
- lsp = nfs4_get_lock_state(state, NULL, fl->fl_pid,
- NFS4_FLOCK_LOCK_TYPE);
- else
- return -EINVAL;
+ lsp = nfs4_get_lock_state(state, fl->fl_owner);
if (lsp == NULL)
return -ENOMEM;
fl->fl_u.nfs4_fl.owner = lsp;
@@ -955,7 +930,6 @@ static int nfs4_copy_lock_stateid(nfs4_stateid *dst,
{
struct nfs4_lock_state *lsp;
fl_owner_t fl_owner;
- pid_t fl_pid;
int ret = -ENOENT;
@@ -966,9 +940,8 @@ static int nfs4_copy_lock_stateid(nfs4_stateid *dst,
goto out;
fl_owner = lockowner->l_owner;
- fl_pid = lockowner->l_pid;
spin_lock(&state->state_lock);
- lsp = __nfs4_find_lock_state(state, fl_owner, fl_pid, NFS4_ANY_LOCK_TYPE);
+ lsp = __nfs4_find_lock_state(state, fl_owner);
if (lsp && test_bit(NFS_LOCK_LOST, &lsp->ls_flags))
ret = -EIO;
else if (lsp != NULL && test_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags) != 0) {
@@ -1251,8 +1224,8 @@ int nfs4_wait_clnt_recover(struct nfs_client *clp)
might_sleep();
atomic_inc(&clp->cl_count);
- res = wait_on_bit(&clp->cl_state, NFS4CLNT_MANAGER_RUNNING,
- nfs_wait_bit_killable, TASK_KILLABLE);
+ res = wait_on_bit_action(&clp->cl_state, NFS4CLNT_MANAGER_RUNNING,
+ nfs_wait_bit_killable, TASK_KILLABLE);
if (res)
goto out;
if (clp->cl_cons_state < 0)
diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h
index 0a744f3a86f6..1c32adbe728d 100644
--- a/fs/nfs/nfs4trace.h
+++ b/fs/nfs/nfs4trace.h
@@ -932,11 +932,11 @@ DEFINE_NFS4_IDMAP_EVENT(nfs4_map_gid_to_group);
DECLARE_EVENT_CLASS(nfs4_read_event,
TP_PROTO(
- const struct nfs_pgio_data *data,
+ const struct nfs_pgio_header *hdr,
int error
),
- TP_ARGS(data, error),
+ TP_ARGS(hdr, error),
TP_STRUCT__entry(
__field(dev_t, dev)
@@ -948,12 +948,12 @@ DECLARE_EVENT_CLASS(nfs4_read_event,
),
TP_fast_assign(
- const struct inode *inode = data->header->inode;
+ const struct inode *inode = hdr->inode;
__entry->dev = inode->i_sb->s_dev;
__entry->fileid = NFS_FILEID(inode);
__entry->fhandle = nfs_fhandle_hash(NFS_FH(inode));
- __entry->offset = data->args.offset;
- __entry->count = data->args.count;
+ __entry->offset = hdr->args.offset;
+ __entry->count = hdr->args.count;
__entry->error = error;
),
@@ -972,10 +972,10 @@ DECLARE_EVENT_CLASS(nfs4_read_event,
#define DEFINE_NFS4_READ_EVENT(name) \
DEFINE_EVENT(nfs4_read_event, name, \
TP_PROTO( \
- const struct nfs_pgio_data *data, \
+ const struct nfs_pgio_header *hdr, \
int error \
), \
- TP_ARGS(data, error))
+ TP_ARGS(hdr, error))
DEFINE_NFS4_READ_EVENT(nfs4_read);
#ifdef CONFIG_NFS_V4_1
DEFINE_NFS4_READ_EVENT(nfs4_pnfs_read);
@@ -983,11 +983,11 @@ DEFINE_NFS4_READ_EVENT(nfs4_pnfs_read);
DECLARE_EVENT_CLASS(nfs4_write_event,
TP_PROTO(
- const struct nfs_pgio_data *data,
+ const struct nfs_pgio_header *hdr,
int error
),
- TP_ARGS(data, error),
+ TP_ARGS(hdr, error),
TP_STRUCT__entry(
__field(dev_t, dev)
@@ -999,12 +999,12 @@ DECLARE_EVENT_CLASS(nfs4_write_event,
),
TP_fast_assign(
- const struct inode *inode = data->header->inode;
+ const struct inode *inode = hdr->inode;
__entry->dev = inode->i_sb->s_dev;
__entry->fileid = NFS_FILEID(inode);
__entry->fhandle = nfs_fhandle_hash(NFS_FH(inode));
- __entry->offset = data->args.offset;
- __entry->count = data->args.count;
+ __entry->offset = hdr->args.offset;
+ __entry->count = hdr->args.count;
__entry->error = error;
),
@@ -1024,10 +1024,10 @@ DECLARE_EVENT_CLASS(nfs4_write_event,
#define DEFINE_NFS4_WRITE_EVENT(name) \
DEFINE_EVENT(nfs4_write_event, name, \
TP_PROTO( \
- const struct nfs_pgio_data *data, \
+ const struct nfs_pgio_header *hdr, \
int error \
), \
- TP_ARGS(data, error))
+ TP_ARGS(hdr, error))
DEFINE_NFS4_WRITE_EVENT(nfs4_write);
#ifdef CONFIG_NFS_V4_1
DEFINE_NFS4_WRITE_EVENT(nfs4_pnfs_write);
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 939ae606cfa4..e13b59d8d9aa 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -7092,7 +7092,7 @@ static int nfs4_xdr_dec_reclaim_complete(struct rpc_rqst *rqstp,
if (!status)
status = decode_sequence(xdr, &res->seq_res, rqstp);
if (!status)
- status = decode_reclaim_complete(xdr, (void *)NULL);
+ status = decode_reclaim_complete(xdr, NULL);
return status;
}
diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c
index 611320753db2..ae05278b3761 100644
--- a/fs/nfs/objlayout/objio_osd.c
+++ b/fs/nfs/objlayout/objio_osd.c
@@ -439,22 +439,21 @@ static void _read_done(struct ore_io_state *ios, void *private)
objlayout_read_done(&objios->oir, status, objios->sync);
}
-int objio_read_pagelist(struct nfs_pgio_data *rdata)
+int objio_read_pagelist(struct nfs_pgio_header *hdr)
{
- struct nfs_pgio_header *hdr = rdata->header;
struct objio_state *objios;
int ret;
ret = objio_alloc_io_state(NFS_I(hdr->inode)->layout, true,
- hdr->lseg, rdata->args.pages, rdata->args.pgbase,
- rdata->args.offset, rdata->args.count, rdata,
+ hdr->lseg, hdr->args.pages, hdr->args.pgbase,
+ hdr->args.offset, hdr->args.count, hdr,
GFP_KERNEL, &objios);
if (unlikely(ret))
return ret;
objios->ios->done = _read_done;
dprintk("%s: offset=0x%llx length=0x%x\n", __func__,
- rdata->args.offset, rdata->args.count);
+ hdr->args.offset, hdr->args.count);
ret = ore_read(objios->ios);
if (unlikely(ret))
objio_free_result(&objios->oir);
@@ -487,11 +486,11 @@ static void _write_done(struct ore_io_state *ios, void *private)
static struct page *__r4w_get_page(void *priv, u64 offset, bool *uptodate)
{
struct objio_state *objios = priv;
- struct nfs_pgio_data *wdata = objios->oir.rpcdata;
- struct address_space *mapping = wdata->header->inode->i_mapping;
+ struct nfs_pgio_header *hdr = objios->oir.rpcdata;
+ struct address_space *mapping = hdr->inode->i_mapping;
pgoff_t index = offset / PAGE_SIZE;
struct page *page;
- loff_t i_size = i_size_read(wdata->header->inode);
+ loff_t i_size = i_size_read(hdr->inode);
if (offset >= i_size) {
*uptodate = true;
@@ -531,15 +530,14 @@ static const struct _ore_r4w_op _r4w_op = {
.put_page = &__r4w_put_page,
};
-int objio_write_pagelist(struct nfs_pgio_data *wdata, int how)
+int objio_write_pagelist(struct nfs_pgio_header *hdr, int how)
{
- struct nfs_pgio_header *hdr = wdata->header;
struct objio_state *objios;
int ret;
ret = objio_alloc_io_state(NFS_I(hdr->inode)->layout, false,
- hdr->lseg, wdata->args.pages, wdata->args.pgbase,
- wdata->args.offset, wdata->args.count, wdata, GFP_NOFS,
+ hdr->lseg, hdr->args.pages, hdr->args.pgbase,
+ hdr->args.offset, hdr->args.count, hdr, GFP_NOFS,
&objios);
if (unlikely(ret))
return ret;
@@ -551,7 +549,7 @@ int objio_write_pagelist(struct nfs_pgio_data *wdata, int how)
objios->ios->done = _write_done;
dprintk("%s: offset=0x%llx length=0x%x\n", __func__,
- wdata->args.offset, wdata->args.count);
+ hdr->args.offset, hdr->args.count);
ret = ore_write(objios->ios);
if (unlikely(ret)) {
objio_free_result(&objios->oir);
diff --git a/fs/nfs/objlayout/objlayout.c b/fs/nfs/objlayout/objlayout.c
index 765d3f54e986..697a16d11fac 100644
--- a/fs/nfs/objlayout/objlayout.c
+++ b/fs/nfs/objlayout/objlayout.c
@@ -229,36 +229,36 @@ objlayout_io_set_result(struct objlayout_io_res *oir, unsigned index,
static void _rpc_read_complete(struct work_struct *work)
{
struct rpc_task *task;
- struct nfs_pgio_data *rdata;
+ struct nfs_pgio_header *hdr;
dprintk("%s enter\n", __func__);
task = container_of(work, struct rpc_task, u.tk_work);
- rdata = container_of(task, struct nfs_pgio_data, task);
+ hdr = container_of(task, struct nfs_pgio_header, task);
- pnfs_ld_read_done(rdata);
+ pnfs_ld_read_done(hdr);
}
void
objlayout_read_done(struct objlayout_io_res *oir, ssize_t status, bool sync)
{
- struct nfs_pgio_data *rdata = oir->rpcdata;
+ struct nfs_pgio_header *hdr = oir->rpcdata;
- oir->status = rdata->task.tk_status = status;
+ oir->status = hdr->task.tk_status = status;
if (status >= 0)
- rdata->res.count = status;
+ hdr->res.count = status;
else
- rdata->header->pnfs_error = status;
+ hdr->pnfs_error = status;
objlayout_iodone(oir);
/* must not use oir after this point */
dprintk("%s: Return status=%zd eof=%d sync=%d\n", __func__,
- status, rdata->res.eof, sync);
+ status, hdr->res.eof, sync);
if (sync)
- pnfs_ld_read_done(rdata);
+ pnfs_ld_read_done(hdr);
else {
- INIT_WORK(&rdata->task.u.tk_work, _rpc_read_complete);
- schedule_work(&rdata->task.u.tk_work);
+ INIT_WORK(&hdr->task.u.tk_work, _rpc_read_complete);
+ schedule_work(&hdr->task.u.tk_work);
}
}
@@ -266,12 +266,11 @@ objlayout_read_done(struct objlayout_io_res *oir, ssize_t status, bool sync)
* Perform sync or async reads.
*/
enum pnfs_try_status
-objlayout_read_pagelist(struct nfs_pgio_data *rdata)
+objlayout_read_pagelist(struct nfs_pgio_header *hdr)
{
- struct nfs_pgio_header *hdr = rdata->header;
struct inode *inode = hdr->inode;
- loff_t offset = rdata->args.offset;
- size_t count = rdata->args.count;
+ loff_t offset = hdr->args.offset;
+ size_t count = hdr->args.count;
int err;
loff_t eof;
@@ -279,23 +278,23 @@ objlayout_read_pagelist(struct nfs_pgio_data *rdata)
if (unlikely(offset + count > eof)) {
if (offset >= eof) {
err = 0;
- rdata->res.count = 0;
- rdata->res.eof = 1;
+ hdr->res.count = 0;
+ hdr->res.eof = 1;
/*FIXME: do we need to call pnfs_ld_read_done() */
goto out;
}
count = eof - offset;
}
- rdata->res.eof = (offset + count) >= eof;
- _fix_verify_io_params(hdr->lseg, &rdata->args.pages,
- &rdata->args.pgbase,
- rdata->args.offset, rdata->args.count);
+ hdr->res.eof = (offset + count) >= eof;
+ _fix_verify_io_params(hdr->lseg, &hdr->args.pages,
+ &hdr->args.pgbase,
+ hdr->args.offset, hdr->args.count);
dprintk("%s: inode(%lx) offset 0x%llx count 0x%Zx eof=%d\n",
- __func__, inode->i_ino, offset, count, rdata->res.eof);
+ __func__, inode->i_ino, offset, count, hdr->res.eof);
- err = objio_read_pagelist(rdata);
+ err = objio_read_pagelist(hdr);
out:
if (unlikely(err)) {
hdr->pnfs_error = err;
@@ -312,38 +311,38 @@ objlayout_read_pagelist(struct nfs_pgio_data *rdata)
static void _rpc_write_complete(struct work_struct *work)
{
struct rpc_task *task;
- struct nfs_pgio_data *wdata;
+ struct nfs_pgio_header *hdr;
dprintk("%s enter\n", __func__);
task = container_of(work, struct rpc_task, u.tk_work);
- wdata = container_of(task, struct nfs_pgio_data, task);
+ hdr = container_of(task, struct nfs_pgio_header, task);
- pnfs_ld_write_done(wdata);
+ pnfs_ld_write_done(hdr);
}
void
objlayout_write_done(struct objlayout_io_res *oir, ssize_t status, bool sync)
{
- struct nfs_pgio_data *wdata = oir->rpcdata;
+ struct nfs_pgio_header *hdr = oir->rpcdata;
- oir->status = wdata->task.tk_status = status;
+ oir->status = hdr->task.tk_status = status;
if (status >= 0) {
- wdata->res.count = status;
- wdata->verf.committed = oir->committed;
+ hdr->res.count = status;
+ hdr->verf.committed = oir->committed;
} else {
- wdata->header->pnfs_error = status;
+ hdr->pnfs_error = status;
}
objlayout_iodone(oir);
/* must not use oir after this point */
dprintk("%s: Return status %zd committed %d sync=%d\n", __func__,
- status, wdata->verf.committed, sync);
+ status, hdr->verf.committed, sync);
if (sync)
- pnfs_ld_write_done(wdata);
+ pnfs_ld_write_done(hdr);
else {
- INIT_WORK(&wdata->task.u.tk_work, _rpc_write_complete);
- schedule_work(&wdata->task.u.tk_work);
+ INIT_WORK(&hdr->task.u.tk_work, _rpc_write_complete);
+ schedule_work(&hdr->task.u.tk_work);
}
}
@@ -351,17 +350,15 @@ objlayout_write_done(struct objlayout_io_res *oir, ssize_t status, bool sync)
* Perform sync or async writes.
*/
enum pnfs_try_status
-objlayout_write_pagelist(struct nfs_pgio_data *wdata,
- int how)
+objlayout_write_pagelist(struct nfs_pgio_header *hdr, int how)
{
- struct nfs_pgio_header *hdr = wdata->header;
int err;
- _fix_verify_io_params(hdr->lseg, &wdata->args.pages,
- &wdata->args.pgbase,
- wdata->args.offset, wdata->args.count);
+ _fix_verify_io_params(hdr->lseg, &hdr->args.pages,
+ &hdr->args.pgbase,
+ hdr->args.offset, hdr->args.count);
- err = objio_write_pagelist(wdata, how);
+ err = objio_write_pagelist(hdr, how);
if (unlikely(err)) {
hdr->pnfs_error = err;
dprintk("%s: Returned Error %d\n", __func__, err);
diff --git a/fs/nfs/objlayout/objlayout.h b/fs/nfs/objlayout/objlayout.h
index 01e041029a6c..fd13f1d2f136 100644
--- a/fs/nfs/objlayout/objlayout.h
+++ b/fs/nfs/objlayout/objlayout.h
@@ -119,8 +119,8 @@ extern void objio_free_lseg(struct pnfs_layout_segment *lseg);
*/
extern void objio_free_result(struct objlayout_io_res *oir);
-extern int objio_read_pagelist(struct nfs_pgio_data *rdata);
-extern int objio_write_pagelist(struct nfs_pgio_data *wdata, int how);
+extern int objio_read_pagelist(struct nfs_pgio_header *rdata);
+extern int objio_write_pagelist(struct nfs_pgio_header *wdata, int how);
/*
* callback API
@@ -168,10 +168,10 @@ extern struct pnfs_layout_segment *objlayout_alloc_lseg(
extern void objlayout_free_lseg(struct pnfs_layout_segment *);
extern enum pnfs_try_status objlayout_read_pagelist(
- struct nfs_pgio_data *);
+ struct nfs_pgio_header *);
extern enum pnfs_try_status objlayout_write_pagelist(
- struct nfs_pgio_data *,
+ struct nfs_pgio_header *,
int how);
extern void objlayout_encode_layoutcommit(
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 17fab89f6358..be7cbce6e4c7 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -115,8 +115,8 @@ __nfs_iocounter_wait(struct nfs_io_counter *c)
set_bit(NFS_IO_INPROGRESS, &c->flags);
if (atomic_read(&c->io_count) == 0)
break;
- ret = nfs_wait_bit_killable(&c->flags);
- } while (atomic_read(&c->io_count) != 0);
+ ret = nfs_wait_bit_killable(&q.key);
+ } while (atomic_read(&c->io_count) != 0 && !ret);
finish_wait(wq, &q.wait);
return ret;
}
@@ -136,28 +136,52 @@ nfs_iocounter_wait(struct nfs_io_counter *c)
return __nfs_iocounter_wait(c);
}
-static int nfs_wait_bit_uninterruptible(void *word)
-{
- io_schedule();
- return 0;
-}
-
/*
* nfs_page_group_lock - lock the head of the page group
* @req - request in group that is to be locked
+ * @nonblock - if true don't block waiting for lock
*
* this lock must be held if modifying the page group list
+ *
+ * return 0 on success, < 0 on error: -EDELAY if nonblocking or the
+ * result from wait_on_bit_lock
+ *
+ * NOTE: calling with nonblock=false should always have set the
+ * lock bit (see fs/buffer.c and other uses of wait_on_bit_lock
+ * with TASK_UNINTERRUPTIBLE), so there is no need to check the result.
+ */
+int
+nfs_page_group_lock(struct nfs_page *req, bool nonblock)
+{
+ struct nfs_page *head = req->wb_head;
+
+ WARN_ON_ONCE(head != head->wb_head);
+
+ if (!test_and_set_bit(PG_HEADLOCK, &head->wb_flags))
+ return 0;
+
+ if (!nonblock)
+ return wait_on_bit_lock(&head->wb_flags, PG_HEADLOCK,
+ TASK_UNINTERRUPTIBLE);
+
+ return -EAGAIN;
+}
+
+/*
+ * nfs_page_group_lock_wait - wait for the lock to clear, but don't grab it
+ * @req - a request in the group
+ *
+ * This is a blocking call to wait for the group lock to be cleared.
*/
void
-nfs_page_group_lock(struct nfs_page *req)
+nfs_page_group_lock_wait(struct nfs_page *req)
{
struct nfs_page *head = req->wb_head;
WARN_ON_ONCE(head != head->wb_head);
- wait_on_bit_lock(&head->wb_flags, PG_HEADLOCK,
- nfs_wait_bit_uninterruptible,
- TASK_UNINTERRUPTIBLE);
+ wait_on_bit(&head->wb_flags, PG_HEADLOCK,
+ TASK_UNINTERRUPTIBLE);
}
/*
@@ -218,7 +242,7 @@ bool nfs_page_group_sync_on_bit(struct nfs_page *req, unsigned int bit)
{
bool ret;
- nfs_page_group_lock(req);
+ nfs_page_group_lock(req, false);
ret = nfs_page_group_sync_on_bit_locked(req, bit);
nfs_page_group_unlock(req);
@@ -435,9 +459,8 @@ void nfs_release_request(struct nfs_page *req)
int
nfs_wait_on_request(struct nfs_page *req)
{
- return wait_on_bit(&req->wb_flags, PG_BUSY,
- nfs_wait_bit_uninterruptible,
- TASK_UNINTERRUPTIBLE);
+ return wait_on_bit_io(&req->wb_flags, PG_BUSY,
+ TASK_UNINTERRUPTIBLE);
}
/*
@@ -462,123 +485,72 @@ size_t nfs_generic_pg_test(struct nfs_pageio_descriptor *desc,
}
EXPORT_SYMBOL_GPL(nfs_generic_pg_test);
-static inline struct nfs_rw_header *NFS_RW_HEADER(struct nfs_pgio_header *hdr)
+struct nfs_pgio_header *nfs_pgio_header_alloc(const struct nfs_rw_ops *ops)
{
- return container_of(hdr, struct nfs_rw_header, header);
-}
-
-/**
- * nfs_rw_header_alloc - Allocate a header for a read or write
- * @ops: Read or write function vector
- */
-struct nfs_rw_header *nfs_rw_header_alloc(const struct nfs_rw_ops *ops)
-{
- struct nfs_rw_header *header = ops->rw_alloc_header();
-
- if (header) {
- struct nfs_pgio_header *hdr = &header->header;
+ struct nfs_pgio_header *hdr = ops->rw_alloc_header();
+ if (hdr) {
INIT_LIST_HEAD(&hdr->pages);
spin_lock_init(&hdr->lock);
- atomic_set(&hdr->refcnt, 0);
hdr->rw_ops = ops;
}
- return header;
+ return hdr;
}
-EXPORT_SYMBOL_GPL(nfs_rw_header_alloc);
+EXPORT_SYMBOL_GPL(nfs_pgio_header_alloc);
/*
- * nfs_rw_header_free - Free a read or write header
+ * nfs_pgio_header_free - Free a read or write header
* @hdr: The header to free
*/
-void nfs_rw_header_free(struct nfs_pgio_header *hdr)
+void nfs_pgio_header_free(struct nfs_pgio_header *hdr)
{
- hdr->rw_ops->rw_free_header(NFS_RW_HEADER(hdr));
+ hdr->rw_ops->rw_free_header(hdr);
}
-EXPORT_SYMBOL_GPL(nfs_rw_header_free);
+EXPORT_SYMBOL_GPL(nfs_pgio_header_free);
/**
- * nfs_pgio_data_alloc - Allocate pageio data
- * @hdr: The header making a request
- * @pagecount: Number of pages to create
- */
-static struct nfs_pgio_data *nfs_pgio_data_alloc(struct nfs_pgio_header *hdr,
- unsigned int pagecount)
-{
- struct nfs_pgio_data *data, *prealloc;
-
- prealloc = &NFS_RW_HEADER(hdr)->rpc_data;
- if (prealloc->header == NULL)
- data = prealloc;
- else
- data = kzalloc(sizeof(*data), GFP_KERNEL);
- if (!data)
- goto out;
-
- if (nfs_pgarray_set(&data->pages, pagecount)) {
- data->header = hdr;
- atomic_inc(&hdr->refcnt);
- } else {
- if (data != prealloc)
- kfree(data);
- data = NULL;
- }
-out:
- return data;
-}
-
-/**
- * nfs_pgio_data_release - Properly free pageio data
- * @data: The data to release
+ * nfs_pgio_data_destroy - make @hdr suitable for reuse
+ *
+ * Frees memory and releases refs from nfs_generic_pgio, so that it may
+ * be called again.
+ *
+ * @hdr: A header that has had nfs_generic_pgio called
*/
-void nfs_pgio_data_release(struct nfs_pgio_data *data)
+void nfs_pgio_data_destroy(struct nfs_pgio_header *hdr)
{
- struct nfs_pgio_header *hdr = data->header;
- struct nfs_rw_header *pageio_header = NFS_RW_HEADER(hdr);
-
- put_nfs_open_context(data->args.context);
- if (data->pages.pagevec != data->pages.page_array)
- kfree(data->pages.pagevec);
- if (data == &pageio_header->rpc_data) {
- data->header = NULL;
- data = NULL;
- }
- if (atomic_dec_and_test(&hdr->refcnt))
- hdr->completion_ops->completion(hdr);
- /* Note: we only free the rpc_task after callbacks are done.
- * See the comment in rpc_free_task() for why
- */
- kfree(data);
+ put_nfs_open_context(hdr->args.context);
+ if (hdr->page_array.pagevec != hdr->page_array.page_array)
+ kfree(hdr->page_array.pagevec);
}
-EXPORT_SYMBOL_GPL(nfs_pgio_data_release);
+EXPORT_SYMBOL_GPL(nfs_pgio_data_destroy);
/**
* nfs_pgio_rpcsetup - Set up arguments for a pageio call
- * @data: The pageio data
+ * @hdr: The pageio hdr
* @count: Number of bytes to read
* @offset: Initial offset
* @how: How to commit data (writes only)
* @cinfo: Commit information for the call (writes only)
*/
-static void nfs_pgio_rpcsetup(struct nfs_pgio_data *data,
+static void nfs_pgio_rpcsetup(struct nfs_pgio_header *hdr,
unsigned int count, unsigned int offset,
int how, struct nfs_commit_info *cinfo)
{
- struct nfs_page *req = data->header->req;
+ struct nfs_page *req = hdr->req;
/* Set up the RPC argument and reply structs
- * NB: take care not to mess about with data->commit et al. */
+ * NB: take care not to mess about with hdr->commit et al. */
- data->args.fh = NFS_FH(data->header->inode);
- data->args.offset = req_offset(req) + offset;
+ hdr->args.fh = NFS_FH(hdr->inode);
+ hdr->args.offset = req_offset(req) + offset;
/* pnfs_set_layoutcommit needs this */
- data->mds_offset = data->args.offset;
- data->args.pgbase = req->wb_pgbase + offset;
- data->args.pages = data->pages.pagevec;
- data->args.count = count;
- data->args.context = get_nfs_open_context(req->wb_context);
- data->args.lock_context = req->wb_lock_context;
- data->args.stable = NFS_UNSTABLE;
+ hdr->mds_offset = hdr->args.offset;
+ hdr->args.pgbase = req->wb_pgbase + offset;
+ hdr->args.pages = hdr->page_array.pagevec;
+ hdr->args.count = count;
+ hdr->args.context = get_nfs_open_context(req->wb_context);
+ hdr->args.lock_context = req->wb_lock_context;
+ hdr->args.stable = NFS_UNSTABLE;
switch (how & (FLUSH_STABLE | FLUSH_COND_STABLE)) {
case 0:
break;
@@ -586,59 +558,59 @@ static void nfs_pgio_rpcsetup(struct nfs_pgio_data *data,
if (nfs_reqs_to_commit(cinfo))
break;
default:
- data->args.stable = NFS_FILE_SYNC;
+ hdr->args.stable = NFS_FILE_SYNC;
}
- data->res.fattr = &data->fattr;
- data->res.count = count;
- data->res.eof = 0;
- data->res.verf = &data->verf;
- nfs_fattr_init(&data->fattr);
+ hdr->res.fattr = &hdr->fattr;
+ hdr->res.count = count;
+ hdr->res.eof = 0;
+ hdr->res.verf = &hdr->verf;
+ nfs_fattr_init(&hdr->fattr);
}
/**
- * nfs_pgio_prepare - Prepare pageio data to go over the wire
+ * nfs_pgio_prepare - Prepare pageio hdr to go over the wire
* @task: The current task
- * @calldata: pageio data to prepare
+ * @calldata: pageio header to prepare
*/
static void nfs_pgio_prepare(struct rpc_task *task, void *calldata)
{
- struct nfs_pgio_data *data = calldata;
+ struct nfs_pgio_header *hdr = calldata;
int err;
- err = NFS_PROTO(data->header->inode)->pgio_rpc_prepare(task, data);
+ err = NFS_PROTO(hdr->inode)->pgio_rpc_prepare(task, hdr);
if (err)
rpc_exit(task, err);
}
-int nfs_initiate_pgio(struct rpc_clnt *clnt, struct nfs_pgio_data *data,
+int nfs_initiate_pgio(struct rpc_clnt *clnt, struct nfs_pgio_header *hdr,
const struct rpc_call_ops *call_ops, int how, int flags)
{
struct rpc_task *task;
struct rpc_message msg = {
- .rpc_argp = &data->args,
- .rpc_resp = &data->res,
- .rpc_cred = data->header->cred,
+ .rpc_argp = &hdr->args,
+ .rpc_resp = &hdr->res,
+ .rpc_cred = hdr->cred,
};
struct rpc_task_setup task_setup_data = {
.rpc_client = clnt,
- .task = &data->task,
+ .task = &hdr->task,
.rpc_message = &msg,
.callback_ops = call_ops,
- .callback_data = data,
+ .callback_data = hdr,
.workqueue = nfsiod_workqueue,
.flags = RPC_TASK_ASYNC | flags,
};
int ret = 0;
- data->header->rw_ops->rw_initiate(data, &msg, &task_setup_data, how);
+ hdr->rw_ops->rw_initiate(hdr, &msg, &task_setup_data, how);
dprintk("NFS: %5u initiated pgio call "
"(req %s/%llu, %u bytes @ offset %llu)\n",
- data->task.tk_pid,
- data->header->inode->i_sb->s_id,
- (unsigned long long)NFS_FILEID(data->header->inode),
- data->args.count,
- (unsigned long long)data->args.offset);
+ hdr->task.tk_pid,
+ hdr->inode->i_sb->s_id,
+ (unsigned long long)NFS_FILEID(hdr->inode),
+ hdr->args.count,
+ (unsigned long long)hdr->args.offset);
task = rpc_run_task(&task_setup_data);
if (IS_ERR(task)) {
@@ -665,22 +637,23 @@ static int nfs_pgio_error(struct nfs_pageio_descriptor *desc,
struct nfs_pgio_header *hdr)
{
set_bit(NFS_IOHDR_REDO, &hdr->flags);
- nfs_pgio_data_release(hdr->data);
- hdr->data = NULL;
+ nfs_pgio_data_destroy(hdr);
+ hdr->completion_ops->completion(hdr);
desc->pg_completion_ops->error_cleanup(&desc->pg_list);
return -ENOMEM;
}
/**
* nfs_pgio_release - Release pageio data
- * @calldata: The pageio data to release
+ * @calldata: The pageio header to release
*/
static void nfs_pgio_release(void *calldata)
{
- struct nfs_pgio_data *data = calldata;
- if (data->header->rw_ops->rw_release)
- data->header->rw_ops->rw_release(data);
- nfs_pgio_data_release(data);
+ struct nfs_pgio_header *hdr = calldata;
+ if (hdr->rw_ops->rw_release)
+ hdr->rw_ops->rw_release(hdr);
+ nfs_pgio_data_destroy(hdr);
+ hdr->completion_ops->completion(hdr);
}
/**
@@ -721,22 +694,22 @@ EXPORT_SYMBOL_GPL(nfs_pageio_init);
/**
* nfs_pgio_result - Basic pageio error handling
* @task: The task that ran
- * @calldata: Pageio data to check
+ * @calldata: Pageio header to check
*/
static void nfs_pgio_result(struct rpc_task *task, void *calldata)
{
- struct nfs_pgio_data *data = calldata;
- struct inode *inode = data->header->inode;
+ struct nfs_pgio_header *hdr = calldata;
+ struct inode *inode = hdr->inode;
dprintk("NFS: %s: %5u, (status %d)\n", __func__,
task->tk_pid, task->tk_status);
- if (data->header->rw_ops->rw_done(task, data, inode) != 0)
+ if (hdr->rw_ops->rw_done(task, hdr, inode) != 0)
return;
if (task->tk_status < 0)
- nfs_set_pgio_error(data->header, task->tk_status, data->args.offset);
+ nfs_set_pgio_error(hdr, task->tk_status, hdr->args.offset);
else
- data->header->rw_ops->rw_result(task, data);
+ hdr->rw_ops->rw_result(task, hdr);
}
/*
@@ -751,32 +724,42 @@ int nfs_generic_pgio(struct nfs_pageio_descriptor *desc,
struct nfs_pgio_header *hdr)
{
struct nfs_page *req;
- struct page **pages;
- struct nfs_pgio_data *data;
+ struct page **pages,
+ *last_page;
struct list_head *head = &desc->pg_list;
struct nfs_commit_info cinfo;
+ unsigned int pagecount, pageused;
- data = nfs_pgio_data_alloc(hdr, nfs_page_array_len(desc->pg_base,
- desc->pg_count));
- if (!data)
+ pagecount = nfs_page_array_len(desc->pg_base, desc->pg_count);
+ if (!nfs_pgarray_set(&hdr->page_array, pagecount))
return nfs_pgio_error(desc, hdr);
nfs_init_cinfo(&cinfo, desc->pg_inode, desc->pg_dreq);
- pages = data->pages.pagevec;
+ pages = hdr->page_array.pagevec;
+ last_page = NULL;
+ pageused = 0;
while (!list_empty(head)) {
req = nfs_list_entry(head->next);
nfs_list_remove_request(req);
nfs_list_add_request(req, &hdr->pages);
- *pages++ = req->wb_page;
+
+ if (WARN_ON_ONCE(pageused >= pagecount))
+ return nfs_pgio_error(desc, hdr);
+
+ if (!last_page || last_page != req->wb_page) {
+ *pages++ = last_page = req->wb_page;
+ pageused++;
+ }
}
+ if (WARN_ON_ONCE(pageused != pagecount))
+ return nfs_pgio_error(desc, hdr);
if ((desc->pg_ioflags & FLUSH_COND_STABLE) &&
(desc->pg_moreio || nfs_reqs_to_commit(&cinfo)))
desc->pg_ioflags &= ~FLUSH_COND_STABLE;
/* Set up the argument struct */
- nfs_pgio_rpcsetup(data, desc->pg_count, 0, desc->pg_ioflags, &cinfo);
- hdr->data = data;
+ nfs_pgio_rpcsetup(hdr, desc->pg_count, 0, desc->pg_ioflags, &cinfo);
desc->pg_rpc_callops = &nfs_pgio_common_ops;
return 0;
}
@@ -784,25 +767,20 @@ EXPORT_SYMBOL_GPL(nfs_generic_pgio);
static int nfs_generic_pg_pgios(struct nfs_pageio_descriptor *desc)
{
- struct nfs_rw_header *rw_hdr;
struct nfs_pgio_header *hdr;
int ret;
- rw_hdr = nfs_rw_header_alloc(desc->pg_rw_ops);
- if (!rw_hdr) {
+ hdr = nfs_pgio_header_alloc(desc->pg_rw_ops);
+ if (!hdr) {
desc->pg_completion_ops->error_cleanup(&desc->pg_list);
return -ENOMEM;
}
- hdr = &rw_hdr->header;
- nfs_pgheader_init(desc, hdr, nfs_rw_header_free);
- atomic_inc(&hdr->refcnt);
+ nfs_pgheader_init(desc, hdr, nfs_pgio_header_free);
ret = nfs_generic_pgio(desc, hdr);
if (ret == 0)
ret = nfs_initiate_pgio(NFS_CLIENT(hdr->inode),
- hdr->data, desc->pg_rpc_callops,
+ hdr, desc->pg_rpc_callops,
desc->pg_ioflags, 0);
- if (atomic_dec_and_test(&hdr->refcnt))
- hdr->completion_ops->completion(hdr);
return ret;
}
@@ -845,6 +823,14 @@ static bool nfs_can_coalesce_requests(struct nfs_page *prev,
return false;
if (req_offset(req) != req_offset(prev) + prev->wb_bytes)
return false;
+ if (req->wb_page == prev->wb_page) {
+ if (req->wb_pgbase != prev->wb_pgbase + prev->wb_bytes)
+ return false;
+ } else {
+ if (req->wb_pgbase != 0 ||
+ prev->wb_pgbase + prev->wb_bytes != PAGE_CACHE_SIZE)
+ return false;
+ }
}
size = pgio->pg_ops->pg_test(pgio, prev, req);
WARN_ON_ONCE(size > req->wb_bytes);
@@ -916,7 +902,7 @@ static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
unsigned int bytes_left = 0;
unsigned int offset, pgbase;
- nfs_page_group_lock(req);
+ nfs_page_group_lock(req, false);
subreq = req;
bytes_left = subreq->wb_bytes;
@@ -938,7 +924,7 @@ static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
if (desc->pg_recoalesce)
return 0;
/* retry add_request for this subreq */
- nfs_page_group_lock(req);
+ nfs_page_group_lock(req, false);
continue;
}
@@ -1013,7 +999,38 @@ int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
} while (ret);
return ret;
}
-EXPORT_SYMBOL_GPL(nfs_pageio_add_request);
+
+/*
+ * nfs_pageio_resend - Transfer requests to new descriptor and resend
+ * @hdr - the pgio header to move request from
+ * @desc - the pageio descriptor to add requests to
+ *
+ * Try to move each request (nfs_page) from @hdr to @desc then attempt
+ * to send them.
+ *
+ * Returns 0 on success and < 0 on error.
+ */
+int nfs_pageio_resend(struct nfs_pageio_descriptor *desc,
+ struct nfs_pgio_header *hdr)
+{
+ LIST_HEAD(failed);
+
+ desc->pg_dreq = hdr->dreq;
+ while (!list_empty(&hdr->pages)) {
+ struct nfs_page *req = nfs_list_entry(hdr->pages.next);
+
+ nfs_list_remove_request(req);
+ if (!nfs_pageio_add_request(desc, req))
+ nfs_list_add_request(req, &failed);
+ }
+ nfs_pageio_complete(desc);
+ if (!list_empty(&failed)) {
+ list_move(&failed, &hdr->pages);
+ return -EIO;
+ }
+ return 0;
+}
+EXPORT_SYMBOL_GPL(nfs_pageio_resend);
/**
* nfs_pageio_complete - Complete I/O on an nfs_pageio_descriptor
@@ -1029,7 +1046,6 @@ void nfs_pageio_complete(struct nfs_pageio_descriptor *desc)
break;
}
}
-EXPORT_SYMBOL_GPL(nfs_pageio_complete);
/**
* nfs_pageio_cond_complete - Conditional I/O completion
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 6fdcd233d6f7..a3851debf8a2 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -361,6 +361,23 @@ pnfs_put_lseg(struct pnfs_layout_segment *lseg)
}
EXPORT_SYMBOL_GPL(pnfs_put_lseg);
+static void pnfs_put_lseg_async_work(struct work_struct *work)
+{
+ struct pnfs_layout_segment *lseg;
+
+ lseg = container_of(work, struct pnfs_layout_segment, pls_work);
+
+ pnfs_put_lseg(lseg);
+}
+
+void
+pnfs_put_lseg_async(struct pnfs_layout_segment *lseg)
+{
+ INIT_WORK(&lseg->pls_work, pnfs_put_lseg_async_work);
+ schedule_work(&lseg->pls_work);
+}
+EXPORT_SYMBOL_GPL(pnfs_put_lseg_async);
+
static u64
end_offset(u64 start, u64 len)
{
@@ -1470,41 +1487,19 @@ pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
}
EXPORT_SYMBOL_GPL(pnfs_generic_pg_test);
-int pnfs_write_done_resend_to_mds(struct inode *inode,
- struct list_head *head,
- const struct nfs_pgio_completion_ops *compl_ops,
- struct nfs_direct_req *dreq)
+int pnfs_write_done_resend_to_mds(struct nfs_pgio_header *hdr)
{
struct nfs_pageio_descriptor pgio;
- LIST_HEAD(failed);
/* Resend all requests through the MDS */
- nfs_pageio_init_write(&pgio, inode, FLUSH_STABLE, true, compl_ops);
- pgio.pg_dreq = dreq;
- while (!list_empty(head)) {
- struct nfs_page *req = nfs_list_entry(head->next);
-
- nfs_list_remove_request(req);
- if (!nfs_pageio_add_request(&pgio, req))
- nfs_list_add_request(req, &failed);
- }
- nfs_pageio_complete(&pgio);
-
- if (!list_empty(&failed)) {
- /* For some reason our attempt to resend pages. Mark the
- * overall send request as having failed, and let
- * nfs_writeback_release_full deal with the error.
- */
- list_move(&failed, head);
- return -EIO;
- }
- return 0;
+ nfs_pageio_init_write(&pgio, hdr->inode, FLUSH_STABLE, true,
+ hdr->completion_ops);
+ return nfs_pageio_resend(&pgio, hdr);
}
EXPORT_SYMBOL_GPL(pnfs_write_done_resend_to_mds);
-static void pnfs_ld_handle_write_error(struct nfs_pgio_data *data)
+static void pnfs_ld_handle_write_error(struct nfs_pgio_header *hdr)
{
- struct nfs_pgio_header *hdr = data->header;
dprintk("pnfs write error = %d\n", hdr->pnfs_error);
if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags &
@@ -1512,50 +1507,42 @@ static void pnfs_ld_handle_write_error(struct nfs_pgio_data *data)
pnfs_return_layout(hdr->inode);
}
if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags))
- data->task.tk_status = pnfs_write_done_resend_to_mds(hdr->inode,
- &hdr->pages,
- hdr->completion_ops,
- hdr->dreq);
+ hdr->task.tk_status = pnfs_write_done_resend_to_mds(hdr);
}
/*
* Called by non rpc-based layout drivers
*/
-void pnfs_ld_write_done(struct nfs_pgio_data *data)
+void pnfs_ld_write_done(struct nfs_pgio_header *hdr)
{
- struct nfs_pgio_header *hdr = data->header;
-
- trace_nfs4_pnfs_write(data, hdr->pnfs_error);
+ trace_nfs4_pnfs_write(hdr, hdr->pnfs_error);
if (!hdr->pnfs_error) {
- pnfs_set_layoutcommit(data);
- hdr->mds_ops->rpc_call_done(&data->task, data);
+ pnfs_set_layoutcommit(hdr);
+ hdr->mds_ops->rpc_call_done(&hdr->task, hdr);
} else
- pnfs_ld_handle_write_error(data);
- hdr->mds_ops->rpc_release(data);
+ pnfs_ld_handle_write_error(hdr);
+ hdr->mds_ops->rpc_release(hdr);
}
EXPORT_SYMBOL_GPL(pnfs_ld_write_done);
static void
pnfs_write_through_mds(struct nfs_pageio_descriptor *desc,
- struct nfs_pgio_data *data)
+ struct nfs_pgio_header *hdr)
{
- struct nfs_pgio_header *hdr = data->header;
-
if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) {
list_splice_tail_init(&hdr->pages, &desc->pg_list);
nfs_pageio_reset_write_mds(desc);
desc->pg_recoalesce = 1;
}
- nfs_pgio_data_release(data);
+ nfs_pgio_data_destroy(hdr);
}
static enum pnfs_try_status
-pnfs_try_to_write_data(struct nfs_pgio_data *wdata,
+pnfs_try_to_write_data(struct nfs_pgio_header *hdr,
const struct rpc_call_ops *call_ops,
struct pnfs_layout_segment *lseg,
int how)
{
- struct nfs_pgio_header *hdr = wdata->header;
struct inode *inode = hdr->inode;
enum pnfs_try_status trypnfs;
struct nfs_server *nfss = NFS_SERVER(inode);
@@ -1563,8 +1550,8 @@ pnfs_try_to_write_data(struct nfs_pgio_data *wdata,
hdr->mds_ops = call_ops;
dprintk("%s: Writing ino:%lu %u@%llu (how %d)\n", __func__,
- inode->i_ino, wdata->args.count, wdata->args.offset, how);
- trypnfs = nfss->pnfs_curr_ld->write_pagelist(wdata, how);
+ inode->i_ino, hdr->args.count, hdr->args.offset, how);
+ trypnfs = nfss->pnfs_curr_ld->write_pagelist(hdr, how);
if (trypnfs != PNFS_NOT_ATTEMPTED)
nfs_inc_stats(inode, NFSIOS_PNFS_WRITE);
dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs);
@@ -1575,139 +1562,105 @@ static void
pnfs_do_write(struct nfs_pageio_descriptor *desc,
struct nfs_pgio_header *hdr, int how)
{
- struct nfs_pgio_data *data = hdr->data;
const struct rpc_call_ops *call_ops = desc->pg_rpc_callops;
struct pnfs_layout_segment *lseg = desc->pg_lseg;
enum pnfs_try_status trypnfs;
desc->pg_lseg = NULL;
- trypnfs = pnfs_try_to_write_data(data, call_ops, lseg, how);
+ trypnfs = pnfs_try_to_write_data(hdr, call_ops, lseg, how);
if (trypnfs == PNFS_NOT_ATTEMPTED)
- pnfs_write_through_mds(desc, data);
+ pnfs_write_through_mds(desc, hdr);
pnfs_put_lseg(lseg);
}
static void pnfs_writehdr_free(struct nfs_pgio_header *hdr)
{
pnfs_put_lseg(hdr->lseg);
- nfs_rw_header_free(hdr);
+ nfs_pgio_header_free(hdr);
}
EXPORT_SYMBOL_GPL(pnfs_writehdr_free);
int
pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc)
{
- struct nfs_rw_header *whdr;
struct nfs_pgio_header *hdr;
int ret;
- whdr = nfs_rw_header_alloc(desc->pg_rw_ops);
- if (!whdr) {
+ hdr = nfs_pgio_header_alloc(desc->pg_rw_ops);
+ if (!hdr) {
desc->pg_completion_ops->error_cleanup(&desc->pg_list);
pnfs_put_lseg(desc->pg_lseg);
desc->pg_lseg = NULL;
return -ENOMEM;
}
- hdr = &whdr->header;
nfs_pgheader_init(desc, hdr, pnfs_writehdr_free);
hdr->lseg = pnfs_get_lseg(desc->pg_lseg);
- atomic_inc(&hdr->refcnt);
ret = nfs_generic_pgio(desc, hdr);
if (ret != 0) {
pnfs_put_lseg(desc->pg_lseg);
desc->pg_lseg = NULL;
} else
pnfs_do_write(desc, hdr, desc->pg_ioflags);
- if (atomic_dec_and_test(&hdr->refcnt))
- hdr->completion_ops->completion(hdr);
return ret;
}
EXPORT_SYMBOL_GPL(pnfs_generic_pg_writepages);
-int pnfs_read_done_resend_to_mds(struct inode *inode,
- struct list_head *head,
- const struct nfs_pgio_completion_ops *compl_ops,
- struct nfs_direct_req *dreq)
+int pnfs_read_done_resend_to_mds(struct nfs_pgio_header *hdr)
{
struct nfs_pageio_descriptor pgio;
- LIST_HEAD(failed);
/* Resend all requests through the MDS */
- nfs_pageio_init_read(&pgio, inode, true, compl_ops);
- pgio.pg_dreq = dreq;
- while (!list_empty(head)) {
- struct nfs_page *req = nfs_list_entry(head->next);
-
- nfs_list_remove_request(req);
- if (!nfs_pageio_add_request(&pgio, req))
- nfs_list_add_request(req, &failed);
- }
- nfs_pageio_complete(&pgio);
-
- if (!list_empty(&failed)) {
- list_move(&failed, head);
- return -EIO;
- }
- return 0;
+ nfs_pageio_init_read(&pgio, hdr->inode, true, hdr->completion_ops);
+ return nfs_pageio_resend(&pgio, hdr);
}
EXPORT_SYMBOL_GPL(pnfs_read_done_resend_to_mds);
-static void pnfs_ld_handle_read_error(struct nfs_pgio_data *data)
+static void pnfs_ld_handle_read_error(struct nfs_pgio_header *hdr)
{
- struct nfs_pgio_header *hdr = data->header;
-
dprintk("pnfs read error = %d\n", hdr->pnfs_error);
if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags &
PNFS_LAYOUTRET_ON_ERROR) {
pnfs_return_layout(hdr->inode);
}
if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags))
- data->task.tk_status = pnfs_read_done_resend_to_mds(hdr->inode,
- &hdr->pages,
- hdr->completion_ops,
- hdr->dreq);
+ hdr->task.tk_status = pnfs_read_done_resend_to_mds(hdr);
}
/*
* Called by non rpc-based layout drivers
*/
-void pnfs_ld_read_done(struct nfs_pgio_data *data)
+void pnfs_ld_read_done(struct nfs_pgio_header *hdr)
{
- struct nfs_pgio_header *hdr = data->header;
-
- trace_nfs4_pnfs_read(data, hdr->pnfs_error);
+ trace_nfs4_pnfs_read(hdr, hdr->pnfs_error);
if (likely(!hdr->pnfs_error)) {
- __nfs4_read_done_cb(data);
- hdr->mds_ops->rpc_call_done(&data->task, data);
+ __nfs4_read_done_cb(hdr);
+ hdr->mds_ops->rpc_call_done(&hdr->task, hdr);
} else
- pnfs_ld_handle_read_error(data);
- hdr->mds_ops->rpc_release(data);
+ pnfs_ld_handle_read_error(hdr);
+ hdr->mds_ops->rpc_release(hdr);
}
EXPORT_SYMBOL_GPL(pnfs_ld_read_done);
static void
pnfs_read_through_mds(struct nfs_pageio_descriptor *desc,
- struct nfs_pgio_data *data)
+ struct nfs_pgio_header *hdr)
{
- struct nfs_pgio_header *hdr = data->header;
-
if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) {
list_splice_tail_init(&hdr->pages, &desc->pg_list);
nfs_pageio_reset_read_mds(desc);
desc->pg_recoalesce = 1;
}
- nfs_pgio_data_release(data);
+ nfs_pgio_data_destroy(hdr);
}
/*
* Call the appropriate parallel I/O subsystem read function.
*/
static enum pnfs_try_status
-pnfs_try_to_read_data(struct nfs_pgio_data *rdata,
+pnfs_try_to_read_data(struct nfs_pgio_header *hdr,
const struct rpc_call_ops *call_ops,
struct pnfs_layout_segment *lseg)
{
- struct nfs_pgio_header *hdr = rdata->header;
struct inode *inode = hdr->inode;
struct nfs_server *nfss = NFS_SERVER(inode);
enum pnfs_try_status trypnfs;
@@ -1715,9 +1668,9 @@ pnfs_try_to_read_data(struct nfs_pgio_data *rdata,
hdr->mds_ops = call_ops;
dprintk("%s: Reading ino:%lu %u@%llu\n",
- __func__, inode->i_ino, rdata->args.count, rdata->args.offset);
+ __func__, inode->i_ino, hdr->args.count, hdr->args.offset);
- trypnfs = nfss->pnfs_curr_ld->read_pagelist(rdata);
+ trypnfs = nfss->pnfs_curr_ld->read_pagelist(hdr);
if (trypnfs != PNFS_NOT_ATTEMPTED)
nfs_inc_stats(inode, NFSIOS_PNFS_READ);
dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs);
@@ -1727,52 +1680,46 @@ pnfs_try_to_read_data(struct nfs_pgio_data *rdata,
static void
pnfs_do_read(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr)
{
- struct nfs_pgio_data *data = hdr->data;
const struct rpc_call_ops *call_ops = desc->pg_rpc_callops;
struct pnfs_layout_segment *lseg = desc->pg_lseg;
enum pnfs_try_status trypnfs;
desc->pg_lseg = NULL;
- trypnfs = pnfs_try_to_read_data(data, call_ops, lseg);
+ trypnfs = pnfs_try_to_read_data(hdr, call_ops, lseg);
if (trypnfs == PNFS_NOT_ATTEMPTED)
- pnfs_read_through_mds(desc, data);
+ pnfs_read_through_mds(desc, hdr);
pnfs_put_lseg(lseg);
}
static void pnfs_readhdr_free(struct nfs_pgio_header *hdr)
{
pnfs_put_lseg(hdr->lseg);
- nfs_rw_header_free(hdr);
+ nfs_pgio_header_free(hdr);
}
EXPORT_SYMBOL_GPL(pnfs_readhdr_free);
int
pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
{
- struct nfs_rw_header *rhdr;
struct nfs_pgio_header *hdr;
int ret;
- rhdr = nfs_rw_header_alloc(desc->pg_rw_ops);
- if (!rhdr) {
+ hdr = nfs_pgio_header_alloc(desc->pg_rw_ops);
+ if (!hdr) {
desc->pg_completion_ops->error_cleanup(&desc->pg_list);
ret = -ENOMEM;
pnfs_put_lseg(desc->pg_lseg);
desc->pg_lseg = NULL;
return ret;
}
- hdr = &rhdr->header;
nfs_pgheader_init(desc, hdr, pnfs_readhdr_free);
hdr->lseg = pnfs_get_lseg(desc->pg_lseg);
- atomic_inc(&hdr->refcnt);
ret = nfs_generic_pgio(desc, hdr);
if (ret != 0) {
pnfs_put_lseg(desc->pg_lseg);
desc->pg_lseg = NULL;
} else
pnfs_do_read(desc, hdr);
- if (atomic_dec_and_test(&hdr->refcnt))
- hdr->completion_ops->completion(hdr);
return ret;
}
EXPORT_SYMBOL_GPL(pnfs_generic_pg_readpages);
@@ -1820,12 +1767,11 @@ void pnfs_set_lo_fail(struct pnfs_layout_segment *lseg)
EXPORT_SYMBOL_GPL(pnfs_set_lo_fail);
void
-pnfs_set_layoutcommit(struct nfs_pgio_data *wdata)
+pnfs_set_layoutcommit(struct nfs_pgio_header *hdr)
{
- struct nfs_pgio_header *hdr = wdata->header;
struct inode *inode = hdr->inode;
struct nfs_inode *nfsi = NFS_I(inode);
- loff_t end_pos = wdata->mds_offset + wdata->res.count;
+ loff_t end_pos = hdr->mds_offset + hdr->res.count;
bool mark_as_dirty = false;
spin_lock(&inode->i_lock);
@@ -1885,7 +1831,7 @@ pnfs_layoutcommit_inode(struct inode *inode, bool sync)
if (test_and_set_bit(NFS_INO_LAYOUTCOMMITTING, &nfsi->flags)) {
if (!sync)
goto out;
- status = wait_on_bit_lock(&nfsi->flags,
+ status = wait_on_bit_lock_action(&nfsi->flags,
NFS_INO_LAYOUTCOMMITTING,
nfs_wait_bit_killable,
TASK_KILLABLE);
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 4fb309a2b4c4..aca3dff5dae6 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -32,6 +32,7 @@
#include <linux/nfs_fs.h>
#include <linux/nfs_page.h>
+#include <linux/workqueue.h>
enum {
NFS_LSEG_VALID = 0, /* cleared when lseg is recalled/returned */
@@ -46,6 +47,7 @@ struct pnfs_layout_segment {
atomic_t pls_refcount;
unsigned long pls_flags;
struct pnfs_layout_hdr *pls_layout;
+ struct work_struct pls_work;
};
enum pnfs_try_status {
@@ -104,6 +106,8 @@ struct pnfs_layoutdriver_type {
int max);
void (*recover_commit_reqs) (struct list_head *list,
struct nfs_commit_info *cinfo);
+ struct nfs_page * (*search_commit_reqs)(struct nfs_commit_info *cinfo,
+ struct page *page);
int (*commit_pagelist)(struct inode *inode,
struct list_head *mds_pages,
int how,
@@ -113,8 +117,8 @@ struct pnfs_layoutdriver_type {
* Return PNFS_ATTEMPTED to indicate the layout code has attempted
* I/O, else return PNFS_NOT_ATTEMPTED to fall back to normal NFS
*/
- enum pnfs_try_status (*read_pagelist) (struct nfs_pgio_data *nfs_data);
- enum pnfs_try_status (*write_pagelist) (struct nfs_pgio_data *nfs_data, int how);
+ enum pnfs_try_status (*read_pagelist)(struct nfs_pgio_header *);
+ enum pnfs_try_status (*write_pagelist)(struct nfs_pgio_header *, int);
void (*free_deviceid_node) (struct nfs4_deviceid_node *);
@@ -179,6 +183,7 @@ extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp);
/* pnfs.c */
void pnfs_get_layout_hdr(struct pnfs_layout_hdr *lo);
void pnfs_put_lseg(struct pnfs_layout_segment *lseg);
+void pnfs_put_lseg_async(struct pnfs_layout_segment *lseg);
void set_pnfs_layoutdriver(struct nfs_server *, const struct nfs_fh *, u32);
void unset_pnfs_layoutdriver(struct nfs_server *);
@@ -213,13 +218,13 @@ bool pnfs_roc(struct inode *ino);
void pnfs_roc_release(struct inode *ino);
void pnfs_roc_set_barrier(struct inode *ino, u32 barrier);
bool pnfs_roc_drain(struct inode *ino, u32 *barrier, struct rpc_task *task);
-void pnfs_set_layoutcommit(struct nfs_pgio_data *wdata);
+void pnfs_set_layoutcommit(struct nfs_pgio_header *);
void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data);
int pnfs_layoutcommit_inode(struct inode *inode, bool sync);
int _pnfs_return_layout(struct inode *);
int pnfs_commit_and_return_layout(struct inode *);
-void pnfs_ld_write_done(struct nfs_pgio_data *);
-void pnfs_ld_read_done(struct nfs_pgio_data *);
+void pnfs_ld_write_done(struct nfs_pgio_header *);
+void pnfs_ld_read_done(struct nfs_pgio_header *);
struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino,
struct nfs_open_context *ctx,
loff_t pos,
@@ -228,12 +233,8 @@ struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino,
gfp_t gfp_flags);
void nfs4_deviceid_mark_client_invalid(struct nfs_client *clp);
-int pnfs_read_done_resend_to_mds(struct inode *inode, struct list_head *head,
- const struct nfs_pgio_completion_ops *compl_ops,
- struct nfs_direct_req *dreq);
-int pnfs_write_done_resend_to_mds(struct inode *inode, struct list_head *head,
- const struct nfs_pgio_completion_ops *compl_ops,
- struct nfs_direct_req *dreq);
+int pnfs_read_done_resend_to_mds(struct nfs_pgio_header *);
+int pnfs_write_done_resend_to_mds(struct nfs_pgio_header *);
struct nfs4_threshold *pnfs_mdsthreshold_alloc(void);
/* nfs4_deviceid_flags */
@@ -345,6 +346,17 @@ pnfs_recover_commit_reqs(struct inode *inode, struct list_head *list,
NFS_SERVER(inode)->pnfs_curr_ld->recover_commit_reqs(list, cinfo);
}
+static inline struct nfs_page *
+pnfs_search_commit_reqs(struct inode *inode, struct nfs_commit_info *cinfo,
+ struct page *page)
+{
+ struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld;
+
+ if (ld == NULL || ld->search_commit_reqs == NULL)
+ return NULL;
+ return ld->search_commit_reqs(cinfo, page);
+}
+
/* Should the pNFS client commit and return the layout upon a setattr */
static inline bool
pnfs_ld_layoutret_on_setattr(struct inode *inode)
@@ -410,6 +422,10 @@ static inline void pnfs_put_lseg(struct pnfs_layout_segment *lseg)
{
}
+static inline void pnfs_put_lseg_async(struct pnfs_layout_segment *lseg)
+{
+}
+
static inline int pnfs_return_layout(struct inode *ino)
{
return 0;
@@ -496,6 +512,13 @@ pnfs_recover_commit_reqs(struct inode *inode, struct list_head *list,
{
}
+static inline struct nfs_page *
+pnfs_search_commit_reqs(struct inode *inode, struct nfs_commit_info *cinfo,
+ struct page *page)
+{
+ return NULL;
+}
+
static inline int pnfs_layoutcommit_inode(struct inode *inode, bool sync)
{
return 0;
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index c171ce1a8a30..b09cc23d6f43 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -578,46 +578,49 @@ nfs_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
return 0;
}
-static int nfs_read_done(struct rpc_task *task, struct nfs_pgio_data *data)
+static int nfs_read_done(struct rpc_task *task, struct nfs_pgio_header *hdr)
{
- struct inode *inode = data->header->inode;
+ struct inode *inode = hdr->inode;
nfs_invalidate_atime(inode);
if (task->tk_status >= 0) {
- nfs_refresh_inode(inode, data->res.fattr);
+ nfs_refresh_inode(inode, hdr->res.fattr);
/* Emulate the eof flag, which isn't normally needed in NFSv2
* as it is guaranteed to always return the file attributes
*/
- if (data->args.offset + data->res.count >= data->res.fattr->size)
- data->res.eof = 1;
+ if (hdr->args.offset + hdr->res.count >= hdr->res.fattr->size)
+ hdr->res.eof = 1;
}
return 0;
}
-static void nfs_proc_read_setup(struct nfs_pgio_data *data, struct rpc_message *msg)
+static void nfs_proc_read_setup(struct nfs_pgio_header *hdr,
+ struct rpc_message *msg)
{
msg->rpc_proc = &nfs_procedures[NFSPROC_READ];
}
-static int nfs_proc_pgio_rpc_prepare(struct rpc_task *task, struct nfs_pgio_data *data)
+static int nfs_proc_pgio_rpc_prepare(struct rpc_task *task,
+ struct nfs_pgio_header *hdr)
{
rpc_call_start(task);
return 0;
}
-static int nfs_write_done(struct rpc_task *task, struct nfs_pgio_data *data)
+static int nfs_write_done(struct rpc_task *task, struct nfs_pgio_header *hdr)
{
- struct inode *inode = data->header->inode;
+ struct inode *inode = hdr->inode;
if (task->tk_status >= 0)
- nfs_post_op_update_inode_force_wcc(inode, data->res.fattr);
+ nfs_post_op_update_inode_force_wcc(inode, hdr->res.fattr);
return 0;
}
-static void nfs_proc_write_setup(struct nfs_pgio_data *data, struct rpc_message *msg)
+static void nfs_proc_write_setup(struct nfs_pgio_header *hdr,
+ struct rpc_message *msg)
{
/* Note: NFSv2 ignores @stable and always uses NFS_FILE_SYNC */
- data->args.stable = NFS_FILE_SYNC;
+ hdr->args.stable = NFS_FILE_SYNC;
msg->rpc_proc = &nfs_procedures[NFSPROC_WRITE];
}
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index e818a475ca64..beff2769c5c5 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -33,12 +33,12 @@ static const struct nfs_rw_ops nfs_rw_read_ops;
static struct kmem_cache *nfs_rdata_cachep;
-static struct nfs_rw_header *nfs_readhdr_alloc(void)
+static struct nfs_pgio_header *nfs_readhdr_alloc(void)
{
return kmem_cache_zalloc(nfs_rdata_cachep, GFP_KERNEL);
}
-static void nfs_readhdr_free(struct nfs_rw_header *rhdr)
+static void nfs_readhdr_free(struct nfs_pgio_header *rhdr)
{
kmem_cache_free(nfs_rdata_cachep, rhdr);
}
@@ -115,12 +115,6 @@ static void nfs_readpage_release(struct nfs_page *req)
unlock_page(req->wb_page);
}
-
- dprintk("NFS: read done (%s/%Lu %d@%Ld)\n",
- req->wb_context->dentry->d_inode->i_sb->s_id,
- (unsigned long long)NFS_FILEID(req->wb_context->dentry->d_inode),
- req->wb_bytes,
- (long long)req_offset(req));
nfs_release_request(req);
}
@@ -172,14 +166,15 @@ out:
hdr->release(hdr);
}
-static void nfs_initiate_read(struct nfs_pgio_data *data, struct rpc_message *msg,
+static void nfs_initiate_read(struct nfs_pgio_header *hdr,
+ struct rpc_message *msg,
struct rpc_task_setup *task_setup_data, int how)
{
- struct inode *inode = data->header->inode;
+ struct inode *inode = hdr->inode;
int swap_flags = IS_SWAPFILE(inode) ? NFS_RPC_SWAPFLAGS : 0;
task_setup_data->flags |= swap_flags;
- NFS_PROTO(inode)->read_setup(data, msg);
+ NFS_PROTO(inode)->read_setup(hdr, msg);
}
static void
@@ -203,14 +198,15 @@ static const struct nfs_pgio_completion_ops nfs_async_read_completion_ops = {
* This is the callback from RPC telling us whether a reply was
* received or some error occurred (timeout or socket shutdown).
*/
-static int nfs_readpage_done(struct rpc_task *task, struct nfs_pgio_data *data,
+static int nfs_readpage_done(struct rpc_task *task,
+ struct nfs_pgio_header *hdr,
struct inode *inode)
{
- int status = NFS_PROTO(inode)->read_done(task, data);
+ int status = NFS_PROTO(inode)->read_done(task, hdr);
if (status != 0)
return status;
- nfs_add_stats(inode, NFSIOS_SERVERREADBYTES, data->res.count);
+ nfs_add_stats(inode, NFSIOS_SERVERREADBYTES, hdr->res.count);
if (task->tk_status == -ESTALE) {
set_bit(NFS_INO_STALE, &NFS_I(inode)->flags);
@@ -219,34 +215,34 @@ static int nfs_readpage_done(struct rpc_task *task, struct nfs_pgio_data *data,
return 0;
}
-static void nfs_readpage_retry(struct rpc_task *task, struct nfs_pgio_data *data)
+static void nfs_readpage_retry(struct rpc_task *task,
+ struct nfs_pgio_header *hdr)
{
- struct nfs_pgio_args *argp = &data->args;
- struct nfs_pgio_res *resp = &data->res;
+ struct nfs_pgio_args *argp = &hdr->args;
+ struct nfs_pgio_res *resp = &hdr->res;
/* This is a short read! */
- nfs_inc_stats(data->header->inode, NFSIOS_SHORTREAD);
+ nfs_inc_stats(hdr->inode, NFSIOS_SHORTREAD);
/* Has the server at least made some progress? */
if (resp->count == 0) {
- nfs_set_pgio_error(data->header, -EIO, argp->offset);
+ nfs_set_pgio_error(hdr, -EIO, argp->offset);
return;
}
- /* Yes, so retry the read at the end of the data */
- data->mds_offset += resp->count;
+ /* Yes, so retry the read at the end of the hdr */
+ hdr->mds_offset += resp->count;
argp->offset += resp->count;
argp->pgbase += resp->count;
argp->count -= resp->count;
rpc_restart_call_prepare(task);
}
-static void nfs_readpage_result(struct rpc_task *task, struct nfs_pgio_data *data)
+static void nfs_readpage_result(struct rpc_task *task,
+ struct nfs_pgio_header *hdr)
{
- struct nfs_pgio_header *hdr = data->header;
-
- if (data->res.eof) {
+ if (hdr->res.eof) {
loff_t bound;
- bound = data->args.offset + data->res.count;
+ bound = hdr->args.offset + hdr->res.count;
spin_lock(&hdr->lock);
if (bound < hdr->io_start + hdr->good_bytes) {
set_bit(NFS_IOHDR_EOF, &hdr->flags);
@@ -254,8 +250,8 @@ static void nfs_readpage_result(struct rpc_task *task, struct nfs_pgio_data *dat
hdr->good_bytes = bound - hdr->io_start;
}
spin_unlock(&hdr->lock);
- } else if (data->res.count != data->args.count)
- nfs_readpage_retry(task, data);
+ } else if (hdr->res.count != hdr->args.count)
+ nfs_readpage_retry(task, hdr);
}
/*
@@ -404,7 +400,7 @@ out:
int __init nfs_init_readpagecache(void)
{
nfs_rdata_cachep = kmem_cache_create("nfs_read_data",
- sizeof(struct nfs_rw_header),
+ sizeof(struct nfs_pgio_header),
0, SLAB_HWCACHE_ALIGN,
NULL);
if (nfs_rdata_cachep == NULL)
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 084af1060d79..e4499d5b51e8 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -1027,8 +1027,7 @@ static bool nfs_auth_info_add(struct nfs_auth_info *auth_info,
rpc_authflavor_t flavor)
{
unsigned int i;
- unsigned int max_flavor_len = (sizeof(auth_info->flavors) /
- sizeof(auth_info->flavors[0]));
+ unsigned int max_flavor_len = ARRAY_SIZE(auth_info->flavors);
/* make sure this flavor isn't already in the list */
for (i = 0; i < auth_info->flavor_len; i++) {
@@ -2180,7 +2179,7 @@ out_no_address:
return -EINVAL;
}
-#define NFS_MOUNT_CMP_FLAGMASK ~(NFS_MOUNT_INTR \
+#define NFS_REMOUNT_CMP_FLAGMASK ~(NFS_MOUNT_INTR \
| NFS_MOUNT_SECURE \
| NFS_MOUNT_TCP \
| NFS_MOUNT_VER3 \
@@ -2188,15 +2187,16 @@ out_no_address:
| NFS_MOUNT_NONLM \
| NFS_MOUNT_BROKEN_SUID \
| NFS_MOUNT_STRICTLOCK \
- | NFS_MOUNT_UNSHARED \
- | NFS_MOUNT_NORESVPORT \
| NFS_MOUNT_LEGACY_INTERFACE)
+#define NFS_MOUNT_CMP_FLAGMASK (NFS_REMOUNT_CMP_FLAGMASK & \
+ ~(NFS_MOUNT_UNSHARED | NFS_MOUNT_NORESVPORT))
+
static int
nfs_compare_remount_data(struct nfs_server *nfss,
struct nfs_parsed_mount_data *data)
{
- if ((data->flags ^ nfss->flags) & NFS_MOUNT_CMP_FLAGMASK ||
+ if ((data->flags ^ nfss->flags) & NFS_REMOUNT_CMP_FLAGMASK ||
data->rsize != nfss->rsize ||
data->wsize != nfss->wsize ||
data->version != nfss->nfs_client->rpc_ops->version ||
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 5e2f10304548..175d5d073ccf 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -47,6 +47,8 @@ static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops;
static const struct nfs_commit_completion_ops nfs_commit_completion_ops;
static const struct nfs_rw_ops nfs_rw_write_ops;
static void nfs_clear_request_commit(struct nfs_page *req);
+static void nfs_init_cinfo_from_inode(struct nfs_commit_info *cinfo,
+ struct inode *inode);
static struct kmem_cache *nfs_wdata_cachep;
static mempool_t *nfs_wdata_mempool;
@@ -71,18 +73,18 @@ void nfs_commit_free(struct nfs_commit_data *p)
}
EXPORT_SYMBOL_GPL(nfs_commit_free);
-static struct nfs_rw_header *nfs_writehdr_alloc(void)
+static struct nfs_pgio_header *nfs_writehdr_alloc(void)
{
- struct nfs_rw_header *p = mempool_alloc(nfs_wdata_mempool, GFP_NOIO);
+ struct nfs_pgio_header *p = mempool_alloc(nfs_wdata_mempool, GFP_NOIO);
if (p)
memset(p, 0, sizeof(*p));
return p;
}
-static void nfs_writehdr_free(struct nfs_rw_header *whdr)
+static void nfs_writehdr_free(struct nfs_pgio_header *hdr)
{
- mempool_free(whdr, nfs_wdata_mempool);
+ mempool_free(hdr, nfs_wdata_mempool);
}
static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error)
@@ -93,6 +95,38 @@ static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error)
}
/*
+ * nfs_page_search_commits_for_head_request_locked
+ *
+ * Search through commit lists on @inode for the head request for @page.
+ * Must be called while holding the inode (which is cinfo) lock.
+ *
+ * Returns the head request if found, or NULL if not found.
+ */
+static struct nfs_page *
+nfs_page_search_commits_for_head_request_locked(struct nfs_inode *nfsi,
+ struct page *page)
+{
+ struct nfs_page *freq, *t;
+ struct nfs_commit_info cinfo;
+ struct inode *inode = &nfsi->vfs_inode;
+
+ nfs_init_cinfo_from_inode(&cinfo, inode);
+
+ /* search through pnfs commit lists */
+ freq = pnfs_search_commit_reqs(inode, &cinfo, page);
+ if (freq)
+ return freq->wb_head;
+
+ /* Linearly search the commit list for the correct request */
+ list_for_each_entry_safe(freq, t, &cinfo.mds->list, wb_list) {
+ if (freq->wb_page == page)
+ return freq->wb_head;
+ }
+
+ return NULL;
+}
+
+/*
* nfs_page_find_head_request_locked - find head request associated with @page
*
* must be called while holding the inode lock.
@@ -106,21 +140,12 @@ nfs_page_find_head_request_locked(struct nfs_inode *nfsi, struct page *page)
if (PagePrivate(page))
req = (struct nfs_page *)page_private(page);
- else if (unlikely(PageSwapCache(page))) {
- struct nfs_page *freq, *t;
-
- /* Linearly search the commit list for the correct req */
- list_for_each_entry_safe(freq, t, &nfsi->commit_info.list, wb_list) {
- if (freq->wb_page == page) {
- req = freq->wb_head;
- break;
- }
- }
- }
+ else if (unlikely(PageSwapCache(page)))
+ req = nfs_page_search_commits_for_head_request_locked(nfsi,
+ page);
if (req) {
WARN_ON_ONCE(req->wb_head != req);
-
kref_get(&req->wb_kref);
}
@@ -216,7 +241,7 @@ static bool nfs_page_group_covers_page(struct nfs_page *req)
unsigned int pos = 0;
unsigned int len = nfs_page_length(req->wb_page);
- nfs_page_group_lock(req);
+ nfs_page_group_lock(req, false);
do {
tmp = nfs_page_group_search_locked(req->wb_head, pos);
@@ -379,8 +404,6 @@ nfs_destroy_unlinked_subrequests(struct nfs_page *destroy_list,
subreq->wb_head = subreq;
subreq->wb_this_page = subreq;
- nfs_clear_request_commit(subreq);
-
/* subreq is now totally disconnected from page group or any
* write / commit lists. last chance to wake any waiters */
nfs_unlock_request(subreq);
@@ -455,8 +478,23 @@ try_again:
return NULL;
}
+ /* holding inode lock, so always make a non-blocking call to try the
+ * page group lock */
+ ret = nfs_page_group_lock(head, true);
+ if (ret < 0) {
+ spin_unlock(&inode->i_lock);
+
+ if (!nonblock && ret == -EAGAIN) {
+ nfs_page_group_lock_wait(head);
+ nfs_release_request(head);
+ goto try_again;
+ }
+
+ nfs_release_request(head);
+ return ERR_PTR(ret);
+ }
+
/* lock each request in the page group */
- nfs_page_group_lock(head);
subreq = head;
do {
/*
@@ -488,7 +526,7 @@ try_again:
* Commit list removal accounting is done after locks are dropped */
subreq = head;
do {
- nfs_list_remove_request(subreq);
+ nfs_clear_request_commit(subreq);
subreq = subreq->wb_this_page;
} while (subreq != head);
@@ -518,15 +556,11 @@ try_again:
nfs_page_group_unlock(head);
- /* drop lock to clear_request_commit the head req and clean up
- * requests on destroy list */
+ /* drop lock to clean uprequests on destroy list */
spin_unlock(&inode->i_lock);
nfs_destroy_unlinked_subrequests(destroy_list, head);
- /* clean up commit list state */
- nfs_clear_request_commit(head);
-
/* still holds ref on head from nfs_page_find_head_request_locked
* and still has lock on head from lock loop */
return head;
@@ -623,7 +657,7 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
int err;
/* Stop dirtying of new pages while we sync */
- err = wait_on_bit_lock(bitlock, NFS_INO_FLUSHING,
+ err = wait_on_bit_lock_action(bitlock, NFS_INO_FLUSHING,
nfs_wait_bit_killable, TASK_KILLABLE);
if (err)
goto out_err;
@@ -705,6 +739,8 @@ static void nfs_inode_remove_request(struct nfs_page *req)
if (test_and_clear_bit(PG_INODE_REF, &req->wb_flags))
nfs_release_request(req);
+ else
+ WARN_ON_ONCE(1);
}
static void
@@ -808,6 +844,7 @@ nfs_clear_page_commit(struct page *page)
dec_bdi_stat(page_file_mapping(page)->backing_dev_info, BDI_RECLAIMABLE);
}
+/* Called holding inode (/cinfo) lock */
static void
nfs_clear_request_commit(struct nfs_page *req)
{
@@ -817,20 +854,17 @@ nfs_clear_request_commit(struct nfs_page *req)
nfs_init_cinfo_from_inode(&cinfo, inode);
if (!pnfs_clear_request_commit(req, &cinfo)) {
- spin_lock(cinfo.lock);
nfs_request_remove_commit_list(req, &cinfo);
- spin_unlock(cinfo.lock);
}
nfs_clear_page_commit(req->wb_page);
}
}
-static inline
-int nfs_write_need_commit(struct nfs_pgio_data *data)
+int nfs_write_need_commit(struct nfs_pgio_header *hdr)
{
- if (data->verf.committed == NFS_DATA_SYNC)
- return data->header->lseg == NULL;
- return data->verf.committed != NFS_FILE_SYNC;
+ if (hdr->verf.committed == NFS_DATA_SYNC)
+ return hdr->lseg == NULL;
+ return hdr->verf.committed != NFS_FILE_SYNC;
}
#else
@@ -856,8 +890,7 @@ nfs_clear_request_commit(struct nfs_page *req)
{
}
-static inline
-int nfs_write_need_commit(struct nfs_pgio_data *data)
+int nfs_write_need_commit(struct nfs_pgio_header *hdr)
{
return 0;
}
@@ -883,11 +916,7 @@ static void nfs_write_completion(struct nfs_pgio_header *hdr)
nfs_context_set_write_error(req->wb_context, hdr->error);
goto remove_req;
}
- if (test_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags)) {
- nfs_mark_request_dirty(req);
- goto next;
- }
- if (test_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags)) {
+ if (nfs_write_need_commit(hdr)) {
memcpy(&req->wb_verf, &hdr->verf.verifier, sizeof(req->wb_verf));
nfs_mark_request_commit(req, hdr->lseg, &cinfo);
goto next;
@@ -1038,9 +1067,9 @@ static struct nfs_page *nfs_try_to_update_request(struct inode *inode,
else
req->wb_bytes = rqend - req->wb_offset;
out_unlock:
- spin_unlock(&inode->i_lock);
if (req)
nfs_clear_request_commit(req);
+ spin_unlock(&inode->i_lock);
return req;
out_flushme:
spin_unlock(&inode->i_lock);
@@ -1241,17 +1270,18 @@ static int flush_task_priority(int how)
return RPC_PRIORITY_NORMAL;
}
-static void nfs_initiate_write(struct nfs_pgio_data *data, struct rpc_message *msg,
+static void nfs_initiate_write(struct nfs_pgio_header *hdr,
+ struct rpc_message *msg,
struct rpc_task_setup *task_setup_data, int how)
{
- struct inode *inode = data->header->inode;
+ struct inode *inode = hdr->inode;
int priority = flush_task_priority(how);
task_setup_data->priority = priority;
- NFS_PROTO(inode)->write_setup(data, msg);
+ NFS_PROTO(inode)->write_setup(hdr, msg);
nfs4_state_protect_write(NFS_SERVER(inode)->nfs_client,
- &task_setup_data->rpc_client, msg, data);
+ &task_setup_data->rpc_client, msg, hdr);
}
/* If a nfs_flush_* function fails, it should remove reqs from @head and
@@ -1313,21 +1343,9 @@ void nfs_commit_prepare(struct rpc_task *task, void *calldata)
NFS_PROTO(data->inode)->commit_rpc_prepare(task, data);
}
-static void nfs_writeback_release_common(struct nfs_pgio_data *data)
+static void nfs_writeback_release_common(struct nfs_pgio_header *hdr)
{
- struct nfs_pgio_header *hdr = data->header;
- int status = data->task.tk_status;
-
- if ((status >= 0) && nfs_write_need_commit(data)) {
- spin_lock(&hdr->lock);
- if (test_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags))
- ; /* Do nothing */
- else if (!test_and_set_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags))
- memcpy(&hdr->verf, &data->verf, sizeof(hdr->verf));
- else if (memcmp(&hdr->verf, &data->verf, sizeof(hdr->verf)))
- set_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags);
- spin_unlock(&hdr->lock);
- }
+ /* do nothing! */
}
/*
@@ -1358,7 +1376,8 @@ static int nfs_should_remove_suid(const struct inode *inode)
/*
* This function is called when the WRITE call is complete.
*/
-static int nfs_writeback_done(struct rpc_task *task, struct nfs_pgio_data *data,
+static int nfs_writeback_done(struct rpc_task *task,
+ struct nfs_pgio_header *hdr,
struct inode *inode)
{
int status;
@@ -1370,13 +1389,14 @@ static int nfs_writeback_done(struct rpc_task *task, struct nfs_pgio_data *data,
* another writer had changed the file, but some applications
* depend on tighter cache coherency when writing.
*/
- status = NFS_PROTO(inode)->write_done(task, data);
+ status = NFS_PROTO(inode)->write_done(task, hdr);
if (status != 0)
return status;
- nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, data->res.count);
+ nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, hdr->res.count);
#if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4)
- if (data->res.verf->committed < data->args.stable && task->tk_status >= 0) {
+ if (hdr->res.verf->committed < hdr->args.stable &&
+ task->tk_status >= 0) {
/* We tried a write call, but the server did not
* commit data to stable storage even though we
* requested it.
@@ -1392,7 +1412,7 @@ static int nfs_writeback_done(struct rpc_task *task, struct nfs_pgio_data *data,
dprintk("NFS: faulty NFS server %s:"
" (committed = %d) != (stable = %d)\n",
NFS_SERVER(inode)->nfs_client->cl_hostname,
- data->res.verf->committed, data->args.stable);
+ hdr->res.verf->committed, hdr->args.stable);
complain = jiffies + 300 * HZ;
}
}
@@ -1407,16 +1427,17 @@ static int nfs_writeback_done(struct rpc_task *task, struct nfs_pgio_data *data,
/*
* This function is called when the WRITE call is complete.
*/
-static void nfs_writeback_result(struct rpc_task *task, struct nfs_pgio_data *data)
+static void nfs_writeback_result(struct rpc_task *task,
+ struct nfs_pgio_header *hdr)
{
- struct nfs_pgio_args *argp = &data->args;
- struct nfs_pgio_res *resp = &data->res;
+ struct nfs_pgio_args *argp = &hdr->args;
+ struct nfs_pgio_res *resp = &hdr->res;
if (resp->count < argp->count) {
static unsigned long complain;
/* This a short write! */
- nfs_inc_stats(data->header->inode, NFSIOS_SHORTWRITE);
+ nfs_inc_stats(hdr->inode, NFSIOS_SHORTWRITE);
/* Has the server at least made some progress? */
if (resp->count == 0) {
@@ -1426,14 +1447,14 @@ static void nfs_writeback_result(struct rpc_task *task, struct nfs_pgio_data *da
argp->count);
complain = jiffies + 300 * HZ;
}
- nfs_set_pgio_error(data->header, -EIO, argp->offset);
+ nfs_set_pgio_error(hdr, -EIO, argp->offset);
task->tk_status = -EIO;
return;
}
/* Was this an NFSv2 write or an NFSv3 stable write? */
if (resp->verf->committed != NFS_UNSTABLE) {
/* Resend from where the server left off */
- data->mds_offset += resp->count;
+ hdr->mds_offset += resp->count;
argp->offset += resp->count;
argp->pgbase += resp->count;
argp->count -= resp->count;
@@ -1703,7 +1724,7 @@ int nfs_commit_inode(struct inode *inode, int how)
return error;
if (!may_wait)
goto out_mark_dirty;
- error = wait_on_bit(&NFS_I(inode)->flags,
+ error = wait_on_bit_action(&NFS_I(inode)->flags,
NFS_INO_COMMIT,
nfs_wait_bit_killable,
TASK_KILLABLE);
@@ -1884,7 +1905,7 @@ int nfs_migrate_page(struct address_space *mapping, struct page *newpage,
int __init nfs_init_writepagecache(void)
{
nfs_wdata_cachep = kmem_cache_create("nfs_write_data",
- sizeof(struct nfs_rw_header),
+ sizeof(struct nfs_pgio_header),
0, SLAB_HWCACHE_ALIGN,
NULL);
if (nfs_wdata_cachep == NULL)
diff --git a/fs/nfs_common/nfsacl.c b/fs/nfs_common/nfsacl.c
index ed628f71274c..538f142935ea 100644
--- a/fs/nfs_common/nfsacl.c
+++ b/fs/nfs_common/nfsacl.c
@@ -30,9 +30,6 @@
MODULE_LICENSE("GPL");
-EXPORT_SYMBOL_GPL(nfsacl_encode);
-EXPORT_SYMBOL_GPL(nfsacl_decode);
-
struct nfsacl_encode_desc {
struct xdr_array2_desc desc;
unsigned int count;
@@ -136,6 +133,7 @@ int nfsacl_encode(struct xdr_buf *buf, unsigned int base, struct inode *inode,
nfsacl_desc.desc.array_len;
return err;
}
+EXPORT_SYMBOL_GPL(nfsacl_encode);
struct nfsacl_decode_desc {
struct xdr_array2_desc desc;
@@ -295,3 +293,4 @@ int nfsacl_decode(struct xdr_buf *buf, unsigned int base, unsigned int *aclcnt,
return 8 + nfsacl_desc.desc.elem_size *
nfsacl_desc.desc.array_len;
}
+EXPORT_SYMBOL_GPL(nfsacl_decode);
diff --git a/fs/nfsd/acl.h b/fs/nfsd/acl.h
index a986ceb6fd0d..4cd7c69a6cb9 100644
--- a/fs/nfsd/acl.h
+++ b/fs/nfsd/acl.h
@@ -47,7 +47,7 @@ struct svc_rqst;
#define NFS4_ACL_MAX ((PAGE_SIZE - sizeof(struct nfs4_acl)) \
/ sizeof(struct nfs4_ace))
-struct nfs4_acl *nfs4_acl_new(int);
+int nfs4_acl_bytes(int entries);
int nfs4_acl_get_whotype(char *, u32);
__be32 nfs4_acl_write_who(struct xdr_stream *xdr, int who);
diff --git a/fs/nfsd/auth.c b/fs/nfsd/auth.c
index 72f44823adbb..9d46a0bdd9f9 100644
--- a/fs/nfsd/auth.c
+++ b/fs/nfsd/auth.c
@@ -28,7 +28,7 @@ int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp)
validate_process_creds();
/* discard any old override before preparing the new set */
- revert_creds(get_cred(current->real_cred));
+ revert_creds(get_cred(current_real_cred()));
new = prepare_creds();
if (!new)
return -ENOMEM;
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index 13b85f94d9e2..72ffd7cce3c3 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -698,8 +698,8 @@ static void svc_export_init(struct cache_head *cnew, struct cache_head *citem)
kref_get(&item->ex_client->ref);
new->ex_client = item->ex_client;
- new->ex_path.dentry = dget(item->ex_path.dentry);
- new->ex_path.mnt = mntget(item->ex_path.mnt);
+ new->ex_path = item->ex_path;
+ path_get(&item->ex_path);
new->ex_fslocs.locations = NULL;
new->ex_fslocs.locations_count = 0;
new->ex_fslocs.migrated = 0;
@@ -1253,7 +1253,7 @@ static int e_show(struct seq_file *m, void *p)
return 0;
}
- cache_get(&exp->h);
+ exp_get(exp);
if (cache_check(cd, &exp->h, NULL))
return 0;
exp_put(exp);
diff --git a/fs/nfsd/export.h b/fs/nfsd/export.h
index cfeea85c5bed..04dc8c167b0c 100644
--- a/fs/nfsd/export.h
+++ b/fs/nfsd/export.h
@@ -101,9 +101,10 @@ static inline void exp_put(struct svc_export *exp)
cache_put(&exp->h, exp->cd);
}
-static inline void exp_get(struct svc_export *exp)
+static inline struct svc_export *exp_get(struct svc_export *exp)
{
cache_get(&exp->h);
+ return exp;
}
struct svc_export * rqst_exp_find(struct svc_rqst *, int, u32 *);
diff --git a/fs/nfsd/fault_inject.c b/fs/nfsd/fault_inject.c
index 2ed05c3cd43d..c16bf5af6831 100644
--- a/fs/nfsd/fault_inject.c
+++ b/fs/nfsd/fault_inject.c
@@ -17,81 +17,13 @@
struct nfsd_fault_inject_op {
char *file;
- u64 (*forget)(struct nfs4_client *, u64);
- u64 (*print)(struct nfs4_client *, u64);
+ u64 (*get)(void);
+ u64 (*set_val)(u64);
+ u64 (*set_clnt)(struct sockaddr_storage *, size_t);
};
-static struct nfsd_fault_inject_op inject_ops[] = {
- {
- .file = "forget_clients",
- .forget = nfsd_forget_client,
- .print = nfsd_print_client,
- },
- {
- .file = "forget_locks",
- .forget = nfsd_forget_client_locks,
- .print = nfsd_print_client_locks,
- },
- {
- .file = "forget_openowners",
- .forget = nfsd_forget_client_openowners,
- .print = nfsd_print_client_openowners,
- },
- {
- .file = "forget_delegations",
- .forget = nfsd_forget_client_delegations,
- .print = nfsd_print_client_delegations,
- },
- {
- .file = "recall_delegations",
- .forget = nfsd_recall_client_delegations,
- .print = nfsd_print_client_delegations,
- },
-};
-
-static long int NUM_INJECT_OPS = sizeof(inject_ops) / sizeof(struct nfsd_fault_inject_op);
static struct dentry *debug_dir;
-static void nfsd_inject_set(struct nfsd_fault_inject_op *op, u64 val)
-{
- u64 count = 0;
-
- if (val == 0)
- printk(KERN_INFO "NFSD Fault Injection: %s (all)", op->file);
- else
- printk(KERN_INFO "NFSD Fault Injection: %s (n = %llu)", op->file, val);
-
- nfs4_lock_state();
- count = nfsd_for_n_state(val, op->forget);
- nfs4_unlock_state();
- printk(KERN_INFO "NFSD: %s: found %llu", op->file, count);
-}
-
-static void nfsd_inject_set_client(struct nfsd_fault_inject_op *op,
- struct sockaddr_storage *addr,
- size_t addr_size)
-{
- char buf[INET6_ADDRSTRLEN];
- struct nfs4_client *clp;
- u64 count;
-
- nfs4_lock_state();
- clp = nfsd_find_client(addr, addr_size);
- if (clp) {
- count = op->forget(clp, 0);
- rpc_ntop((struct sockaddr *)&clp->cl_addr, buf, sizeof(buf));
- printk(KERN_INFO "NFSD [%s]: Client %s had %llu state object(s)\n", op->file, buf, count);
- }
- nfs4_unlock_state();
-}
-
-static void nfsd_inject_get(struct nfsd_fault_inject_op *op, u64 *val)
-{
- nfs4_lock_state();
- *val = nfsd_for_n_state(0, op->print);
- nfs4_unlock_state();
-}
-
static ssize_t fault_inject_read(struct file *file, char __user *buf,
size_t len, loff_t *ppos)
{
@@ -99,9 +31,10 @@ static ssize_t fault_inject_read(struct file *file, char __user *buf,
char read_buf[25];
size_t size;
loff_t pos = *ppos;
+ struct nfsd_fault_inject_op *op = file_inode(file)->i_private;
if (!pos)
- nfsd_inject_get(file_inode(file)->i_private, &val);
+ val = op->get();
size = scnprintf(read_buf, sizeof(read_buf), "%llu\n", val);
return simple_read_from_buffer(buf, len, ppos, read_buf, size);
@@ -114,18 +47,36 @@ static ssize_t fault_inject_write(struct file *file, const char __user *buf,
size_t size = min(sizeof(write_buf) - 1, len);
struct net *net = current->nsproxy->net_ns;
struct sockaddr_storage sa;
+ struct nfsd_fault_inject_op *op = file_inode(file)->i_private;
u64 val;
+ char *nl;
if (copy_from_user(write_buf, buf, size))
return -EFAULT;
write_buf[size] = '\0';
+ /* Deal with any embedded newlines in the string */
+ nl = strchr(write_buf, '\n');
+ if (nl) {
+ size = nl - write_buf;
+ *nl = '\0';
+ }
+
size = rpc_pton(net, write_buf, size, (struct sockaddr *)&sa, sizeof(sa));
- if (size > 0)
- nfsd_inject_set_client(file_inode(file)->i_private, &sa, size);
- else {
+ if (size > 0) {
+ val = op->set_clnt(&sa, size);
+ if (val)
+ pr_info("NFSD [%s]: Client %s had %llu state object(s)\n",
+ op->file, write_buf, val);
+ } else {
val = simple_strtoll(write_buf, NULL, 0);
- nfsd_inject_set(file_inode(file)->i_private, val);
+ if (val == 0)
+ pr_info("NFSD Fault Injection: %s (all)", op->file);
+ else
+ pr_info("NFSD Fault Injection: %s (n = %llu)",
+ op->file, val);
+ val = op->set_val(val);
+ pr_info("NFSD: %s: found %llu", op->file, val);
}
return len; /* on success, claim we got the whole input */
}
@@ -141,6 +92,41 @@ void nfsd_fault_inject_cleanup(void)
debugfs_remove_recursive(debug_dir);
}
+static struct nfsd_fault_inject_op inject_ops[] = {
+ {
+ .file = "forget_clients",
+ .get = nfsd_inject_print_clients,
+ .set_val = nfsd_inject_forget_clients,
+ .set_clnt = nfsd_inject_forget_client,
+ },
+ {
+ .file = "forget_locks",
+ .get = nfsd_inject_print_locks,
+ .set_val = nfsd_inject_forget_locks,
+ .set_clnt = nfsd_inject_forget_client_locks,
+ },
+ {
+ .file = "forget_openowners",
+ .get = nfsd_inject_print_openowners,
+ .set_val = nfsd_inject_forget_openowners,
+ .set_clnt = nfsd_inject_forget_client_openowners,
+ },
+ {
+ .file = "forget_delegations",
+ .get = nfsd_inject_print_delegations,
+ .set_val = nfsd_inject_forget_delegations,
+ .set_clnt = nfsd_inject_forget_client_delegations,
+ },
+ {
+ .file = "recall_delegations",
+ .get = nfsd_inject_print_delegations,
+ .set_val = nfsd_inject_recall_delegations,
+ .set_clnt = nfsd_inject_recall_client_delegations,
+ },
+};
+
+#define NUM_INJECT_OPS (sizeof(inject_ops)/sizeof(struct nfsd_fault_inject_op))
+
int nfsd_fault_inject_init(void)
{
unsigned int i;
diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
index d32b3aa6600d..ea6749a32760 100644
--- a/fs/nfsd/netns.h
+++ b/fs/nfsd/netns.h
@@ -29,14 +29,19 @@
#define CLIENT_HASH_SIZE (1 << CLIENT_HASH_BITS)
#define CLIENT_HASH_MASK (CLIENT_HASH_SIZE - 1)
-#define LOCKOWNER_INO_HASH_BITS 8
-#define LOCKOWNER_INO_HASH_SIZE (1 << LOCKOWNER_INO_HASH_BITS)
-
#define SESSION_HASH_SIZE 512
struct cld_net;
struct nfsd4_client_tracking_ops;
+/*
+ * Represents a nfsd "container". With respect to nfsv4 state tracking, the
+ * fields of interest are the *_id_hashtbls and the *_name_tree. These track
+ * the nfs4_client objects by either short or long form clientid.
+ *
+ * Each nfsd_net runs a nfs4_laundromat workqueue job when necessary to clean
+ * up expired clients and delegations within the container.
+ */
struct nfsd_net {
struct cld_net *cld_net;
@@ -66,8 +71,6 @@ struct nfsd_net {
struct rb_root conf_name_tree;
struct list_head *unconf_id_hashtbl;
struct rb_root unconf_name_tree;
- struct list_head *ownerstr_hashtbl;
- struct list_head *lockowner_ino_hashtbl;
struct list_head *sessionid_hashtbl;
/*
* client_lru holds client queue ordered by nfs4_client.cl_time
@@ -97,10 +100,16 @@ struct nfsd_net {
bool nfsd_net_up;
bool lockd_up;
+ /* Time of server startup */
+ struct timeval nfssvc_boot;
+
/*
- * Time of server startup
+ * Max number of connections this nfsd container will allow. Defaults
+ * to '0' which is means that it bases this on the number of threads.
*/
- struct timeval nfssvc_boot;
+ unsigned int max_connections;
+
+ u32 clientid_counter;
struct svc_serv *nfsd_serv;
};
diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c
index 12b023a7ab7d..ac54ea60b3f6 100644
--- a/fs/nfsd/nfs2acl.c
+++ b/fs/nfsd/nfs2acl.c
@@ -54,14 +54,14 @@ static __be32 nfsacld_proc_getacl(struct svc_rqst * rqstp,
if (resp->mask & (NFS_ACL|NFS_ACLCNT)) {
acl = get_acl(inode, ACL_TYPE_ACCESS);
- if (IS_ERR(acl)) {
- nfserr = nfserrno(PTR_ERR(acl));
- goto fail;
- }
if (acl == NULL) {
/* Solaris returns the inode's minimum ACL. */
acl = posix_acl_from_mode(inode->i_mode, GFP_KERNEL);
}
+ if (IS_ERR(acl)) {
+ nfserr = nfserrno(PTR_ERR(acl));
+ goto fail;
+ }
resp->acl_access = acl;
}
if (resp->mask & (NFS_DFACL|NFS_DFACLCNT)) {
diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c
index 2a514e21dc74..34cbbab6abd7 100644
--- a/fs/nfsd/nfs3acl.c
+++ b/fs/nfsd/nfs3acl.c
@@ -47,14 +47,14 @@ static __be32 nfsd3_proc_getacl(struct svc_rqst * rqstp,
if (resp->mask & (NFS_ACL|NFS_ACLCNT)) {
acl = get_acl(inode, ACL_TYPE_ACCESS);
- if (IS_ERR(acl)) {
- nfserr = nfserrno(PTR_ERR(acl));
- goto fail;
- }
if (acl == NULL) {
/* Solaris returns the inode's minimum ACL. */
acl = posix_acl_from_mode(inode->i_mode, GFP_KERNEL);
}
+ if (IS_ERR(acl)) {
+ nfserr = nfserrno(PTR_ERR(acl));
+ goto fail;
+ }
resp->acl_access = acl;
}
if (resp->mask & (NFS_DFACL|NFS_DFACLCNT)) {
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index 401289913130..fa2525b2e9d7 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -157,11 +157,7 @@ nfsd3_proc_read(struct svc_rqst *rqstp, struct nfsd3_readargs *argp,
* 1 (status) + 22 (post_op_attr) + 1 (count) + 1 (eof)
* + 1 (xdr opaque byte count) = 26
*/
-
- resp->count = argp->count;
- if (max_blocksize < resp->count)
- resp->count = max_blocksize;
-
+ resp->count = min(argp->count, max_blocksize);
svc_reserve_auth(rqstp, ((1 + NFS3_POST_OP_ATTR_WORDS + 3)<<2) + resp->count +4);
fh_copy(&resp->fh, &argp->fh);
@@ -286,8 +282,7 @@ nfsd3_proc_symlink(struct svc_rqst *rqstp, struct nfsd3_symlinkargs *argp,
fh_copy(&resp->dirfh, &argp->ffh);
fh_init(&resp->fh, NFS3_FHSIZE);
nfserr = nfsd_symlink(rqstp, &resp->dirfh, argp->fname, argp->flen,
- argp->tname, argp->tlen,
- &resp->fh, &argp->attrs);
+ argp->tname, &resp->fh);
RETURN_STATUS(nfserr);
}
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index e6c01e80325e..39c5eb3ad33a 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -120,10 +120,7 @@ decode_sattr3(__be32 *p, struct iattr *iap)
iap->ia_valid |= ATTR_SIZE;
p = xdr_decode_hyper(p, &newsize);
- if (newsize <= NFS_OFFSET_MAX)
- iap->ia_size = newsize;
- else
- iap->ia_size = NFS_OFFSET_MAX;
+ iap->ia_size = min_t(u64, newsize, NFS_OFFSET_MAX);
}
if ((tmp = ntohl(*p++)) == 1) { /* set to server time */
iap->ia_valid |= ATTR_ATIME;
@@ -338,10 +335,8 @@ nfs3svc_decode_readargs(struct svc_rqst *rqstp, __be32 *p,
return 0;
p = xdr_decode_hyper(p, &args->offset);
- len = args->count = ntohl(*p++);
-
- if (len > max_blocksize)
- len = max_blocksize;
+ args->count = ntohl(*p++);
+ len = min(args->count, max_blocksize);
/* set up the kvec */
v=0;
@@ -349,7 +344,7 @@ nfs3svc_decode_readargs(struct svc_rqst *rqstp, __be32 *p,
struct page *p = *(rqstp->rq_next_page++);
rqstp->rq_vec[v].iov_base = page_address(p);
- rqstp->rq_vec[v].iov_len = len < PAGE_SIZE? len : PAGE_SIZE;
+ rqstp->rq_vec[v].iov_len = min_t(unsigned int, len, PAGE_SIZE);
len -= rqstp->rq_vec[v].iov_len;
v++;
}
@@ -484,9 +479,7 @@ nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p,
}
/* now copy next page if there is one */
if (len && !avail && rqstp->rq_arg.page_len) {
- avail = rqstp->rq_arg.page_len;
- if (avail > PAGE_SIZE)
- avail = PAGE_SIZE;
+ avail = min_t(unsigned int, rqstp->rq_arg.page_len, PAGE_SIZE);
old = page_address(rqstp->rq_arg.pages[0]);
}
while (len && avail && *old) {
@@ -571,10 +564,7 @@ nfs3svc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p,
args->verf = p; p += 2;
args->dircount = ~0;
args->count = ntohl(*p++);
-
- if (args->count > PAGE_SIZE)
- args->count = PAGE_SIZE;
-
+ args->count = min_t(u32, args->count, PAGE_SIZE);
args->buffer = page_address(*(rqstp->rq_next_page++));
return xdr_argsize_check(rqstp, p);
@@ -595,10 +585,7 @@ nfs3svc_decode_readdirplusargs(struct svc_rqst *rqstp, __be32 *p,
args->dircount = ntohl(*p++);
args->count = ntohl(*p++);
- len = (args->count > max_blocksize) ? max_blocksize :
- args->count;
- args->count = len;
-
+ len = args->count = min(args->count, max_blocksize);
while (len > 0) {
struct page *p = *(rqstp->rq_next_page++);
if (!args->buffer)
@@ -913,8 +900,7 @@ encode_entry(struct readdir_cd *ccd, const char *name, int namlen,
*/
/* truncate filename if too long */
- if (namlen > NFS3_MAXNAMLEN)
- namlen = NFS3_MAXNAMLEN;
+ namlen = min(namlen, NFS3_MAXNAMLEN);
slen = XDR_QUADLEN(namlen);
elen = slen + NFS3_ENTRY_BAGGAGE
diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c
index d714156a19fd..59fd76651781 100644
--- a/fs/nfsd/nfs4acl.c
+++ b/fs/nfsd/nfs4acl.c
@@ -146,35 +146,43 @@ nfsd4_get_nfs4_acl(struct svc_rqst *rqstp, struct dentry *dentry,
int size = 0;
pacl = get_acl(inode, ACL_TYPE_ACCESS);
- if (!pacl) {
+ if (!pacl)
pacl = posix_acl_from_mode(inode->i_mode, GFP_KERNEL);
- if (IS_ERR(pacl))
- return PTR_ERR(pacl);
- }
+
+ if (IS_ERR(pacl))
+ return PTR_ERR(pacl);
+
/* allocate for worst case: one (deny, allow) pair each: */
size += 2 * pacl->a_count;
if (S_ISDIR(inode->i_mode)) {
flags = NFS4_ACL_DIR;
dpacl = get_acl(inode, ACL_TYPE_DEFAULT);
+ if (IS_ERR(dpacl)) {
+ error = PTR_ERR(dpacl);
+ goto rel_pacl;
+ }
+
if (dpacl)
size += 2 * dpacl->a_count;
}
- *acl = nfs4_acl_new(size);
+ *acl = kmalloc(nfs4_acl_bytes(size), GFP_KERNEL);
if (*acl == NULL) {
error = -ENOMEM;
goto out;
}
+ (*acl)->naces = 0;
_posix_to_nfsv4_one(pacl, *acl, flags & ~NFS4_ACL_TYPE_DEFAULT);
if (dpacl)
_posix_to_nfsv4_one(dpacl, *acl, flags | NFS4_ACL_TYPE_DEFAULT);
- out:
- posix_acl_release(pacl);
+out:
posix_acl_release(dpacl);
+rel_pacl:
+ posix_acl_release(pacl);
return error;
}
@@ -872,16 +880,13 @@ ace2type(struct nfs4_ace *ace)
return -1;
}
-struct nfs4_acl *
-nfs4_acl_new(int n)
+/*
+ * return the size of the struct nfs4_acl required to represent an acl
+ * with @entries entries.
+ */
+int nfs4_acl_bytes(int entries)
{
- struct nfs4_acl *acl;
-
- acl = kmalloc(sizeof(*acl) + n*sizeof(struct nfs4_ace), GFP_KERNEL);
- if (acl == NULL)
- return NULL;
- acl->naces = 0;
- return acl;
+ return sizeof(struct nfs4_acl) + entries * sizeof(struct nfs4_ace);
}
static struct {
@@ -935,5 +940,5 @@ __be32 nfs4_acl_write_who(struct xdr_stream *xdr, int who)
return 0;
}
WARN_ON_ONCE(1);
- return -1;
+ return nfserr_serverfault;
}
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 2c73cae9899d..e0be57b0f79b 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -337,7 +337,7 @@ static void encode_cb_recall4args(struct xdr_stream *xdr,
p = xdr_reserve_space(xdr, 4);
*p++ = xdr_zero; /* truncate */
- encode_nfs_fh4(xdr, &dp->dl_fh);
+ encode_nfs_fh4(xdr, &dp->dl_stid.sc_file->fi_fhandle);
hdr->nops++;
}
@@ -678,7 +678,7 @@ static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *c
(clp->cl_cred.cr_flavor >= RPC_AUTH_GSS_KRB5))
return -EINVAL;
args.client_name = clp->cl_cred.cr_principal;
- args.prognumber = conn->cb_prog,
+ args.prognumber = conn->cb_prog;
args.protocol = XPRT_TRANSPORT_TCP;
args.authflavor = clp->cl_cred.cr_flavor;
clp->cl_cb_ident = conn->cb_ident;
@@ -689,7 +689,8 @@ static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *c
clp->cl_cb_session = ses;
args.bc_xprt = conn->cb_xprt;
args.prognumber = clp->cl_cb_session->se_cb_prog;
- args.protocol = XPRT_TRANSPORT_BC_TCP;
+ args.protocol = conn->cb_xprt->xpt_class->xcl_ident |
+ XPRT_TRANSPORT_BC;
args.authflavor = ses->se_cb_sec.flavor;
}
/* Create RPC client */
@@ -904,7 +905,7 @@ static void nfsd4_cb_recall_release(void *calldata)
spin_lock(&clp->cl_lock);
list_del(&cb->cb_per_client);
spin_unlock(&clp->cl_lock);
- nfs4_put_delegation(dp);
+ nfs4_put_stid(&dp->dl_stid);
}
}
@@ -933,7 +934,7 @@ void nfsd4_shutdown_callback(struct nfs4_client *clp)
set_bit(NFSD4_CLIENT_CB_KILL, &clp->cl_flags);
/*
* Note this won't actually result in a null callback;
- * instead, nfsd4_do_callback_rpc() will detect the killed
+ * instead, nfsd4_run_cb_null() will detect the killed
* client, destroy the rpc client, and stop:
*/
do_probe_callback(clp);
@@ -1011,9 +1012,9 @@ static void nfsd4_process_cb_update(struct nfsd4_callback *cb)
run_nfsd4_cb(cb);
}
-static void nfsd4_do_callback_rpc(struct work_struct *w)
+static void
+nfsd4_run_callback_rpc(struct nfsd4_callback *cb)
{
- struct nfsd4_callback *cb = container_of(w, struct nfsd4_callback, cb_work);
struct nfs4_client *clp = cb->cb_clp;
struct rpc_clnt *clnt;
@@ -1031,9 +1032,22 @@ static void nfsd4_do_callback_rpc(struct work_struct *w)
cb->cb_ops, cb);
}
-void nfsd4_init_callback(struct nfsd4_callback *cb)
+void
+nfsd4_run_cb_null(struct work_struct *w)
{
- INIT_WORK(&cb->cb_work, nfsd4_do_callback_rpc);
+ struct nfsd4_callback *cb = container_of(w, struct nfsd4_callback,
+ cb_work);
+ nfsd4_run_callback_rpc(cb);
+}
+
+void
+nfsd4_run_cb_recall(struct work_struct *w)
+{
+ struct nfsd4_callback *cb = container_of(w, struct nfsd4_callback,
+ cb_work);
+
+ nfsd4_prepare_cb_recall(cb->cb_op);
+ nfsd4_run_callback_rpc(cb);
}
void nfsd4_cb_recall(struct nfs4_delegation *dp)
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 8f029db5d271..5e0dc528a0e8 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -177,7 +177,7 @@ fh_dup2(struct svc_fh *dst, struct svc_fh *src)
fh_put(dst);
dget(src->fh_dentry);
if (src->fh_export)
- cache_get(&src->fh_export->h);
+ exp_get(src->fh_export);
*dst = *src;
}
@@ -385,8 +385,6 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
if (nfsd4_has_session(cstate))
copy_clientid(&open->op_clientid, cstate->session);
- nfs4_lock_state();
-
/* check seqid for replay. set nfs4_owner */
resp = rqstp->rq_resp;
status = nfsd4_process_open1(&resp->cstate, open, nn);
@@ -431,8 +429,7 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
break;
case NFS4_OPEN_CLAIM_PREVIOUS:
status = nfs4_check_open_reclaim(&open->op_clientid,
- cstate->minorversion,
- nn);
+ cstate, nn);
if (status)
goto out;
open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED;
@@ -461,19 +458,17 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
* set, (2) sets open->op_stateid, (3) sets open->op_delegation.
*/
status = nfsd4_process_open2(rqstp, resfh, open);
- WARN_ON(status && open->op_created);
+ WARN(status && open->op_created,
+ "nfsd4_process_open2 failed to open newly-created file! status=%u\n",
+ be32_to_cpu(status));
out:
if (resfh && resfh != &cstate->current_fh) {
fh_dup2(&cstate->current_fh, resfh);
fh_put(resfh);
kfree(resfh);
}
- nfsd4_cleanup_open_state(open, status);
- if (open->op_openowner && !nfsd4_has_session(cstate))
- cstate->replay_owner = &open->op_openowner->oo_owner;
+ nfsd4_cleanup_open_state(cstate, open, status);
nfsd4_bump_seqid(cstate, status);
- if (!cstate->replay_owner)
- nfs4_unlock_state();
return status;
}
@@ -581,8 +576,12 @@ static void gen_boot_verifier(nfs4_verifier *verifier, struct net *net)
__be32 verf[2];
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
- verf[0] = (__be32)nn->nfssvc_boot.tv_sec;
- verf[1] = (__be32)nn->nfssvc_boot.tv_usec;
+ /*
+ * This is opaque to client, so no need to byte-swap. Use
+ * __force to keep sparse happy
+ */
+ verf[0] = (__force __be32)nn->nfssvc_boot.tv_sec;
+ verf[1] = (__force __be32)nn->nfssvc_boot.tv_usec;
memcpy(verifier->data, verf, sizeof(verifier->data));
}
@@ -619,8 +618,7 @@ nfsd4_create(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
case NF4LNK:
status = nfsd_symlink(rqstp, &cstate->current_fh,
create->cr_name, create->cr_namelen,
- create->cr_linkname, create->cr_linklen,
- &resfh, &create->cr_iattr);
+ create->cr_data, &resfh);
break;
case NF4BLK:
@@ -909,8 +907,8 @@ nfsd4_secinfo_no_name(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstat
default:
return nfserr_inval;
}
- exp_get(cstate->current_fh.fh_export);
- sin->sin_exp = cstate->current_fh.fh_export;
+
+ sin->sin_exp = exp_get(cstate->current_fh.fh_export);
fh_put(&cstate->current_fh);
return nfs_ok;
}
@@ -1289,7 +1287,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
* Don't use the deferral mechanism for NFSv4; compounds make it
* too hard to avoid non-idempotency problems.
*/
- rqstp->rq_usedeferral = 0;
+ rqstp->rq_usedeferral = false;
/*
* According to RFC3010, this takes precedence over all other errors.
@@ -1391,10 +1389,7 @@ encode_op:
args->ops, args->opcnt, resp->opcnt, op->opnum,
be32_to_cpu(status));
- if (cstate->replay_owner) {
- nfs4_unlock_state();
- cstate->replay_owner = NULL;
- }
+ nfsd4_cstate_clear_replay(cstate);
/* XXX Ugh, we need to get rid of this kind of special case: */
if (op->opnum == OP_READ && op->u.read.rd_filp)
fput(op->u.read.rd_filp);
@@ -1408,7 +1403,7 @@ encode_op:
BUG_ON(cstate->replay_owner);
out:
/* Reset deferral mechanism for RPC deferrals */
- rqstp->rq_usedeferral = 1;
+ rqstp->rq_usedeferral = true;
dprintk("nfsv4 compound returned %d\n", ntohl(status));
return status;
}
@@ -1520,21 +1515,17 @@ static inline u32 nfsd4_read_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
u32 maxcount = 0, rlen = 0;
maxcount = svc_max_payload(rqstp);
- rlen = op->u.read.rd_length;
-
- if (rlen > maxcount)
- rlen = maxcount;
+ rlen = min(op->u.read.rd_length, maxcount);
return (op_encode_hdr_size + 2 + XDR_QUADLEN(rlen)) * sizeof(__be32);
}
static inline u32 nfsd4_readdir_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
{
- u32 maxcount = svc_max_payload(rqstp);
- u32 rlen = op->u.readdir.rd_maxcount;
+ u32 maxcount = 0, rlen = 0;
- if (rlen > maxcount)
- rlen = maxcount;
+ maxcount = svc_max_payload(rqstp);
+ rlen = min(op->u.readdir.rd_maxcount, maxcount);
return (op_encode_hdr_size + op_encode_verifier_maxsz +
XDR_QUADLEN(rlen)) * sizeof(__be32);
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 2204e1fe5725..2e80a59e7e91 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -70,13 +70,11 @@ static u64 current_sessionid = 1;
#define CURRENT_STATEID(stateid) (!memcmp((stateid), &currentstateid, sizeof(stateid_t)))
/* forward declarations */
-static int check_for_locks(struct nfs4_file *filp, struct nfs4_lockowner *lowner);
+static bool check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner);
+static void nfs4_free_ol_stateid(struct nfs4_stid *stid);
/* Locking: */
-/* Currently used for almost all code touching nfsv4 state: */
-static DEFINE_MUTEX(client_mutex);
-
/*
* Currently used for the del_recall_lru and file hash table. In an
* effort to decrease the scope of the client_mutex, this spinlock may
@@ -84,18 +82,18 @@ static DEFINE_MUTEX(client_mutex);
*/
static DEFINE_SPINLOCK(state_lock);
+/*
+ * A waitqueue for all in-progress 4.0 CLOSE operations that are waiting for
+ * the refcount on the open stateid to drop.
+ */
+static DECLARE_WAIT_QUEUE_HEAD(close_wq);
+
static struct kmem_cache *openowner_slab;
static struct kmem_cache *lockowner_slab;
static struct kmem_cache *file_slab;
static struct kmem_cache *stateid_slab;
static struct kmem_cache *deleg_slab;
-void
-nfs4_lock_state(void)
-{
- mutex_lock(&client_mutex);
-}
-
static void free_session(struct nfsd4_session *);
static bool is_session_dead(struct nfsd4_session *ses)
@@ -103,12 +101,6 @@ static bool is_session_dead(struct nfsd4_session *ses)
return ses->se_flags & NFS4_SESSION_DEAD;
}
-void nfsd4_put_session(struct nfsd4_session *ses)
-{
- if (atomic_dec_and_test(&ses->se_ref) && is_session_dead(ses))
- free_session(ses);
-}
-
static __be32 mark_session_dead_locked(struct nfsd4_session *ses, int ref_held_by_me)
{
if (atomic_read(&ses->se_ref) > ref_held_by_me)
@@ -117,46 +109,17 @@ static __be32 mark_session_dead_locked(struct nfsd4_session *ses, int ref_held_b
return nfs_ok;
}
-static __be32 nfsd4_get_session_locked(struct nfsd4_session *ses)
-{
- if (is_session_dead(ses))
- return nfserr_badsession;
- atomic_inc(&ses->se_ref);
- return nfs_ok;
-}
-
-void
-nfs4_unlock_state(void)
-{
- mutex_unlock(&client_mutex);
-}
-
static bool is_client_expired(struct nfs4_client *clp)
{
return clp->cl_time == 0;
}
-static __be32 mark_client_expired_locked(struct nfs4_client *clp)
-{
- if (atomic_read(&clp->cl_refcount))
- return nfserr_jukebox;
- clp->cl_time = 0;
- return nfs_ok;
-}
-
-static __be32 mark_client_expired(struct nfs4_client *clp)
+static __be32 get_client_locked(struct nfs4_client *clp)
{
struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
- __be32 ret;
- spin_lock(&nn->client_lock);
- ret = mark_client_expired_locked(clp);
- spin_unlock(&nn->client_lock);
- return ret;
-}
+ lockdep_assert_held(&nn->client_lock);
-static __be32 get_client_locked(struct nfs4_client *clp)
-{
if (is_client_expired(clp))
return nfserr_expired;
atomic_inc(&clp->cl_refcount);
@@ -197,13 +160,17 @@ renew_client(struct nfs4_client *clp)
static void put_client_renew_locked(struct nfs4_client *clp)
{
+ struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
+
+ lockdep_assert_held(&nn->client_lock);
+
if (!atomic_dec_and_test(&clp->cl_refcount))
return;
if (!is_client_expired(clp))
renew_client_locked(clp);
}
-void put_client_renew(struct nfs4_client *clp)
+static void put_client_renew(struct nfs4_client *clp)
{
struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
@@ -214,6 +181,79 @@ void put_client_renew(struct nfs4_client *clp)
spin_unlock(&nn->client_lock);
}
+static __be32 nfsd4_get_session_locked(struct nfsd4_session *ses)
+{
+ __be32 status;
+
+ if (is_session_dead(ses))
+ return nfserr_badsession;
+ status = get_client_locked(ses->se_client);
+ if (status)
+ return status;
+ atomic_inc(&ses->se_ref);
+ return nfs_ok;
+}
+
+static void nfsd4_put_session_locked(struct nfsd4_session *ses)
+{
+ struct nfs4_client *clp = ses->se_client;
+ struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
+
+ lockdep_assert_held(&nn->client_lock);
+
+ if (atomic_dec_and_test(&ses->se_ref) && is_session_dead(ses))
+ free_session(ses);
+ put_client_renew_locked(clp);
+}
+
+static void nfsd4_put_session(struct nfsd4_session *ses)
+{
+ struct nfs4_client *clp = ses->se_client;
+ struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
+
+ spin_lock(&nn->client_lock);
+ nfsd4_put_session_locked(ses);
+ spin_unlock(&nn->client_lock);
+}
+
+static int
+same_owner_str(struct nfs4_stateowner *sop, struct xdr_netobj *owner)
+{
+ return (sop->so_owner.len == owner->len) &&
+ 0 == memcmp(sop->so_owner.data, owner->data, owner->len);
+}
+
+static struct nfs4_openowner *
+find_openstateowner_str_locked(unsigned int hashval, struct nfsd4_open *open,
+ struct nfs4_client *clp)
+{
+ struct nfs4_stateowner *so;
+
+ lockdep_assert_held(&clp->cl_lock);
+
+ list_for_each_entry(so, &clp->cl_ownerstr_hashtbl[hashval],
+ so_strhash) {
+ if (!so->so_is_open_owner)
+ continue;
+ if (same_owner_str(so, &open->op_owner)) {
+ atomic_inc(&so->so_count);
+ return openowner(so);
+ }
+ }
+ return NULL;
+}
+
+static struct nfs4_openowner *
+find_openstateowner_str(unsigned int hashval, struct nfsd4_open *open,
+ struct nfs4_client *clp)
+{
+ struct nfs4_openowner *oo;
+
+ spin_lock(&clp->cl_lock);
+ oo = find_openstateowner_str_locked(hashval, open, clp);
+ spin_unlock(&clp->cl_lock);
+ return oo;
+}
static inline u32
opaque_hashval(const void *ptr, int nbytes)
@@ -236,10 +276,11 @@ static void nfsd4_free_file(struct nfs4_file *f)
static inline void
put_nfs4_file(struct nfs4_file *fi)
{
+ might_lock(&state_lock);
+
if (atomic_dec_and_lock(&fi->fi_ref, &state_lock)) {
hlist_del(&fi->fi_hash);
spin_unlock(&state_lock);
- iput(fi->fi_inode);
nfsd4_free_file(fi);
}
}
@@ -250,7 +291,80 @@ get_nfs4_file(struct nfs4_file *fi)
atomic_inc(&fi->fi_ref);
}
-static int num_delegations;
+static struct file *
+__nfs4_get_fd(struct nfs4_file *f, int oflag)
+{
+ if (f->fi_fds[oflag])
+ return get_file(f->fi_fds[oflag]);
+ return NULL;
+}
+
+static struct file *
+find_writeable_file_locked(struct nfs4_file *f)
+{
+ struct file *ret;
+
+ lockdep_assert_held(&f->fi_lock);
+
+ ret = __nfs4_get_fd(f, O_WRONLY);
+ if (!ret)
+ ret = __nfs4_get_fd(f, O_RDWR);
+ return ret;
+}
+
+static struct file *
+find_writeable_file(struct nfs4_file *f)
+{
+ struct file *ret;
+
+ spin_lock(&f->fi_lock);
+ ret = find_writeable_file_locked(f);
+ spin_unlock(&f->fi_lock);
+
+ return ret;
+}
+
+static struct file *find_readable_file_locked(struct nfs4_file *f)
+{
+ struct file *ret;
+
+ lockdep_assert_held(&f->fi_lock);
+
+ ret = __nfs4_get_fd(f, O_RDONLY);
+ if (!ret)
+ ret = __nfs4_get_fd(f, O_RDWR);
+ return ret;
+}
+
+static struct file *
+find_readable_file(struct nfs4_file *f)
+{
+ struct file *ret;
+
+ spin_lock(&f->fi_lock);
+ ret = find_readable_file_locked(f);
+ spin_unlock(&f->fi_lock);
+
+ return ret;
+}
+
+static struct file *
+find_any_file(struct nfs4_file *f)
+{
+ struct file *ret;
+
+ spin_lock(&f->fi_lock);
+ ret = __nfs4_get_fd(f, O_RDWR);
+ if (!ret) {
+ ret = __nfs4_get_fd(f, O_WRONLY);
+ if (!ret)
+ ret = __nfs4_get_fd(f, O_RDONLY);
+ }
+ spin_unlock(&f->fi_lock);
+ return ret;
+}
+
+static atomic_long_t num_delegations;
unsigned long max_delegations;
/*
@@ -262,12 +376,11 @@ unsigned long max_delegations;
#define OWNER_HASH_SIZE (1 << OWNER_HASH_BITS)
#define OWNER_HASH_MASK (OWNER_HASH_SIZE - 1)
-static unsigned int ownerstr_hashval(u32 clientid, struct xdr_netobj *ownername)
+static unsigned int ownerstr_hashval(struct xdr_netobj *ownername)
{
unsigned int ret;
ret = opaque_hashval(ownername->data, ownername->len);
- ret += clientid;
return ret & OWNER_HASH_MASK;
}
@@ -275,75 +388,124 @@ static unsigned int ownerstr_hashval(u32 clientid, struct xdr_netobj *ownername)
#define FILE_HASH_BITS 8
#define FILE_HASH_SIZE (1 << FILE_HASH_BITS)
-static unsigned int file_hashval(struct inode *ino)
+static unsigned int nfsd_fh_hashval(struct knfsd_fh *fh)
+{
+ return jhash2(fh->fh_base.fh_pad, XDR_QUADLEN(fh->fh_size), 0);
+}
+
+static unsigned int file_hashval(struct knfsd_fh *fh)
+{
+ return nfsd_fh_hashval(fh) & (FILE_HASH_SIZE - 1);
+}
+
+static bool nfsd_fh_match(struct knfsd_fh *fh1, struct knfsd_fh *fh2)
{
- /* XXX: why are we hashing on inode pointer, anyway? */
- return hash_ptr(ino, FILE_HASH_BITS);
+ return fh1->fh_size == fh2->fh_size &&
+ !memcmp(fh1->fh_base.fh_pad,
+ fh2->fh_base.fh_pad,
+ fh1->fh_size);
}
static struct hlist_head file_hashtbl[FILE_HASH_SIZE];
-static void __nfs4_file_get_access(struct nfs4_file *fp, int oflag)
+static void
+__nfs4_file_get_access(struct nfs4_file *fp, u32 access)
{
- WARN_ON_ONCE(!(fp->fi_fds[oflag] || fp->fi_fds[O_RDWR]));
- atomic_inc(&fp->fi_access[oflag]);
+ lockdep_assert_held(&fp->fi_lock);
+
+ if (access & NFS4_SHARE_ACCESS_WRITE)
+ atomic_inc(&fp->fi_access[O_WRONLY]);
+ if (access & NFS4_SHARE_ACCESS_READ)
+ atomic_inc(&fp->fi_access[O_RDONLY]);
}
-static void nfs4_file_get_access(struct nfs4_file *fp, int oflag)
+static __be32
+nfs4_file_get_access(struct nfs4_file *fp, u32 access)
{
- if (oflag == O_RDWR) {
- __nfs4_file_get_access(fp, O_RDONLY);
- __nfs4_file_get_access(fp, O_WRONLY);
- } else
- __nfs4_file_get_access(fp, oflag);
+ lockdep_assert_held(&fp->fi_lock);
+
+ /* Does this access mode make sense? */
+ if (access & ~NFS4_SHARE_ACCESS_BOTH)
+ return nfserr_inval;
+
+ /* Does it conflict with a deny mode already set? */
+ if ((access & fp->fi_share_deny) != 0)
+ return nfserr_share_denied;
+
+ __nfs4_file_get_access(fp, access);
+ return nfs_ok;
}
-static void nfs4_file_put_fd(struct nfs4_file *fp, int oflag)
+static __be32 nfs4_file_check_deny(struct nfs4_file *fp, u32 deny)
{
- if (fp->fi_fds[oflag]) {
- fput(fp->fi_fds[oflag]);
- fp->fi_fds[oflag] = NULL;
+ /* Common case is that there is no deny mode. */
+ if (deny) {
+ /* Does this deny mode make sense? */
+ if (deny & ~NFS4_SHARE_DENY_BOTH)
+ return nfserr_inval;
+
+ if ((deny & NFS4_SHARE_DENY_READ) &&
+ atomic_read(&fp->fi_access[O_RDONLY]))
+ return nfserr_share_denied;
+
+ if ((deny & NFS4_SHARE_DENY_WRITE) &&
+ atomic_read(&fp->fi_access[O_WRONLY]))
+ return nfserr_share_denied;
}
+ return nfs_ok;
}
static void __nfs4_file_put_access(struct nfs4_file *fp, int oflag)
{
- if (atomic_dec_and_test(&fp->fi_access[oflag])) {
- nfs4_file_put_fd(fp, oflag);
+ might_lock(&fp->fi_lock);
+
+ if (atomic_dec_and_lock(&fp->fi_access[oflag], &fp->fi_lock)) {
+ struct file *f1 = NULL;
+ struct file *f2 = NULL;
+
+ swap(f1, fp->fi_fds[oflag]);
if (atomic_read(&fp->fi_access[1 - oflag]) == 0)
- nfs4_file_put_fd(fp, O_RDWR);
+ swap(f2, fp->fi_fds[O_RDWR]);
+ spin_unlock(&fp->fi_lock);
+ if (f1)
+ fput(f1);
+ if (f2)
+ fput(f2);
}
}
-static void nfs4_file_put_access(struct nfs4_file *fp, int oflag)
+static void nfs4_file_put_access(struct nfs4_file *fp, u32 access)
{
- if (oflag == O_RDWR) {
- __nfs4_file_put_access(fp, O_RDONLY);
+ WARN_ON_ONCE(access & ~NFS4_SHARE_ACCESS_BOTH);
+
+ if (access & NFS4_SHARE_ACCESS_WRITE)
__nfs4_file_put_access(fp, O_WRONLY);
- } else
- __nfs4_file_put_access(fp, oflag);
+ if (access & NFS4_SHARE_ACCESS_READ)
+ __nfs4_file_put_access(fp, O_RDONLY);
}
-static struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct
-kmem_cache *slab)
+static struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl,
+ struct kmem_cache *slab)
{
- struct idr *stateids = &cl->cl_stateids;
struct nfs4_stid *stid;
int new_id;
- stid = kmem_cache_alloc(slab, GFP_KERNEL);
+ stid = kmem_cache_zalloc(slab, GFP_KERNEL);
if (!stid)
return NULL;
- new_id = idr_alloc_cyclic(stateids, stid, 0, 0, GFP_KERNEL);
+ idr_preload(GFP_KERNEL);
+ spin_lock(&cl->cl_lock);
+ new_id = idr_alloc_cyclic(&cl->cl_stateids, stid, 0, 0, GFP_NOWAIT);
+ spin_unlock(&cl->cl_lock);
+ idr_preload_end();
if (new_id < 0)
goto out_free;
stid->sc_client = cl;
- stid->sc_type = 0;
stid->sc_stateid.si_opaque.so_id = new_id;
stid->sc_stateid.si_opaque.so_clid = cl->cl_clientid;
/* Will be incremented before return to client: */
- stid->sc_stateid.si_generation = 0;
+ atomic_set(&stid->sc_count, 1);
/*
* It shouldn't be a problem to reuse an opaque stateid value.
@@ -360,9 +522,24 @@ out_free:
return NULL;
}
-static struct nfs4_ol_stateid * nfs4_alloc_stateid(struct nfs4_client *clp)
+static struct nfs4_ol_stateid * nfs4_alloc_open_stateid(struct nfs4_client *clp)
{
- return openlockstateid(nfs4_alloc_stid(clp, stateid_slab));
+ struct nfs4_stid *stid;
+ struct nfs4_ol_stateid *stp;
+
+ stid = nfs4_alloc_stid(clp, stateid_slab);
+ if (!stid)
+ return NULL;
+
+ stp = openlockstateid(stid);
+ stp->st_stid.sc_free = nfs4_free_ol_stateid;
+ return stp;
+}
+
+static void nfs4_free_deleg(struct nfs4_stid *stid)
+{
+ kmem_cache_free(deleg_slab, stid);
+ atomic_long_dec(&num_delegations);
}
/*
@@ -379,10 +556,11 @@ static struct nfs4_ol_stateid * nfs4_alloc_stateid(struct nfs4_client *clp)
* Each filter is 256 bits. We hash the filehandle to 32bit and use the
* low 3 bytes as hash-table indices.
*
- * 'state_lock', which is always held when block_delegations() is called,
+ * 'blocked_delegations_lock', which is always taken in block_delegations(),
* is used to manage concurrent access. Testing does not need the lock
* except when swapping the two filters.
*/
+static DEFINE_SPINLOCK(blocked_delegations_lock);
static struct bloom_pair {
int entries, old_entries;
time_t swap_time;
@@ -398,7 +576,7 @@ static int delegation_blocked(struct knfsd_fh *fh)
if (bd->entries == 0)
return 0;
if (seconds_since_boot() - bd->swap_time > 30) {
- spin_lock(&state_lock);
+ spin_lock(&blocked_delegations_lock);
if (seconds_since_boot() - bd->swap_time > 30) {
bd->entries -= bd->old_entries;
bd->old_entries = bd->entries;
@@ -407,7 +585,7 @@ static int delegation_blocked(struct knfsd_fh *fh)
bd->new = 1-bd->new;
bd->swap_time = seconds_since_boot();
}
- spin_unlock(&state_lock);
+ spin_unlock(&blocked_delegations_lock);
}
hash = arch_fast_hash(&fh->fh_base, fh->fh_size, 0);
if (test_bit(hash&255, bd->set[0]) &&
@@ -430,69 +608,73 @@ static void block_delegations(struct knfsd_fh *fh)
hash = arch_fast_hash(&fh->fh_base, fh->fh_size, 0);
+ spin_lock(&blocked_delegations_lock);
__set_bit(hash&255, bd->set[bd->new]);
__set_bit((hash>>8)&255, bd->set[bd->new]);
__set_bit((hash>>16)&255, bd->set[bd->new]);
if (bd->entries == 0)
bd->swap_time = seconds_since_boot();
bd->entries += 1;
+ spin_unlock(&blocked_delegations_lock);
}
static struct nfs4_delegation *
-alloc_init_deleg(struct nfs4_client *clp, struct nfs4_ol_stateid *stp, struct svc_fh *current_fh)
+alloc_init_deleg(struct nfs4_client *clp, struct svc_fh *current_fh)
{
struct nfs4_delegation *dp;
+ long n;
dprintk("NFSD alloc_init_deleg\n");
- if (num_delegations > max_delegations)
- return NULL;
+ n = atomic_long_inc_return(&num_delegations);
+ if (n < 0 || n > max_delegations)
+ goto out_dec;
if (delegation_blocked(&current_fh->fh_handle))
- return NULL;
+ goto out_dec;
dp = delegstateid(nfs4_alloc_stid(clp, deleg_slab));
if (dp == NULL)
- return dp;
+ goto out_dec;
+
+ dp->dl_stid.sc_free = nfs4_free_deleg;
/*
* delegation seqid's are never incremented. The 4.1 special
* meaning of seqid 0 isn't meaningful, really, but let's avoid
* 0 anyway just for consistency and use 1:
*/
dp->dl_stid.sc_stateid.si_generation = 1;
- num_delegations++;
INIT_LIST_HEAD(&dp->dl_perfile);
INIT_LIST_HEAD(&dp->dl_perclnt);
INIT_LIST_HEAD(&dp->dl_recall_lru);
- dp->dl_file = NULL;
dp->dl_type = NFS4_OPEN_DELEGATE_READ;
- fh_copy_shallow(&dp->dl_fh, &current_fh->fh_handle);
- dp->dl_time = 0;
- atomic_set(&dp->dl_count, 1);
- nfsd4_init_callback(&dp->dl_recall);
+ INIT_WORK(&dp->dl_recall.cb_work, nfsd4_run_cb_recall);
return dp;
+out_dec:
+ atomic_long_dec(&num_delegations);
+ return NULL;
}
-static void remove_stid(struct nfs4_stid *s)
+void
+nfs4_put_stid(struct nfs4_stid *s)
{
- struct idr *stateids = &s->sc_client->cl_stateids;
+ struct nfs4_file *fp = s->sc_file;
+ struct nfs4_client *clp = s->sc_client;
- idr_remove(stateids, s->sc_stateid.si_opaque.so_id);
-}
+ might_lock(&clp->cl_lock);
-static void nfs4_free_stid(struct kmem_cache *slab, struct nfs4_stid *s)
-{
- kmem_cache_free(slab, s);
-}
-
-void
-nfs4_put_delegation(struct nfs4_delegation *dp)
-{
- if (atomic_dec_and_test(&dp->dl_count)) {
- nfs4_free_stid(deleg_slab, &dp->dl_stid);
- num_delegations--;
+ if (!atomic_dec_and_lock(&s->sc_count, &clp->cl_lock)) {
+ wake_up_all(&close_wq);
+ return;
}
+ idr_remove(&clp->cl_stateids, s->sc_stateid.si_opaque.so_id);
+ spin_unlock(&clp->cl_lock);
+ s->sc_free(s);
+ if (fp)
+ put_nfs4_file(fp);
}
static void nfs4_put_deleg_lease(struct nfs4_file *fp)
{
+ lockdep_assert_held(&state_lock);
+
if (!fp->fi_lease)
return;
if (atomic_dec_and_test(&fp->fi_delegees)) {
@@ -512,54 +694,54 @@ static void
hash_delegation_locked(struct nfs4_delegation *dp, struct nfs4_file *fp)
{
lockdep_assert_held(&state_lock);
+ lockdep_assert_held(&fp->fi_lock);
+ atomic_inc(&dp->dl_stid.sc_count);
dp->dl_stid.sc_type = NFS4_DELEG_STID;
list_add(&dp->dl_perfile, &fp->fi_delegations);
list_add(&dp->dl_perclnt, &dp->dl_stid.sc_client->cl_delegations);
}
-/* Called under the state lock. */
static void
-unhash_delegation(struct nfs4_delegation *dp)
+unhash_delegation_locked(struct nfs4_delegation *dp)
{
- spin_lock(&state_lock);
- list_del_init(&dp->dl_perclnt);
- list_del_init(&dp->dl_perfile);
- list_del_init(&dp->dl_recall_lru);
- spin_unlock(&state_lock);
- if (dp->dl_file) {
- nfs4_put_deleg_lease(dp->dl_file);
- put_nfs4_file(dp->dl_file);
- dp->dl_file = NULL;
- }
-}
-
+ struct nfs4_file *fp = dp->dl_stid.sc_file;
+ lockdep_assert_held(&state_lock);
-static void destroy_revoked_delegation(struct nfs4_delegation *dp)
-{
+ dp->dl_stid.sc_type = NFS4_CLOSED_DELEG_STID;
+ /* Ensure that deleg break won't try to requeue it */
+ ++dp->dl_time;
+ spin_lock(&fp->fi_lock);
+ list_del_init(&dp->dl_perclnt);
list_del_init(&dp->dl_recall_lru);
- remove_stid(&dp->dl_stid);
- nfs4_put_delegation(dp);
+ list_del_init(&dp->dl_perfile);
+ spin_unlock(&fp->fi_lock);
+ if (fp)
+ nfs4_put_deleg_lease(fp);
}
static void destroy_delegation(struct nfs4_delegation *dp)
{
- unhash_delegation(dp);
- remove_stid(&dp->dl_stid);
- nfs4_put_delegation(dp);
+ spin_lock(&state_lock);
+ unhash_delegation_locked(dp);
+ spin_unlock(&state_lock);
+ nfs4_put_stid(&dp->dl_stid);
}
static void revoke_delegation(struct nfs4_delegation *dp)
{
struct nfs4_client *clp = dp->dl_stid.sc_client;
+ WARN_ON(!list_empty(&dp->dl_recall_lru));
+
if (clp->cl_minorversion == 0)
- destroy_delegation(dp);
+ nfs4_put_stid(&dp->dl_stid);
else {
- unhash_delegation(dp);
dp->dl_stid.sc_type = NFS4_REVOKED_DELEG_STID;
+ spin_lock(&clp->cl_lock);
list_add(&dp->dl_recall_lru, &clp->cl_revoked);
+ spin_unlock(&clp->cl_lock);
}
}
@@ -607,57 +789,62 @@ bmap_to_share_mode(unsigned long bmap) {
return access;
}
-static bool
-test_share(struct nfs4_ol_stateid *stp, struct nfsd4_open *open) {
- unsigned int access, deny;
-
- access = bmap_to_share_mode(stp->st_access_bmap);
- deny = bmap_to_share_mode(stp->st_deny_bmap);
- if ((access & open->op_share_deny) || (deny & open->op_share_access))
- return false;
- return true;
-}
-
/* set share access for a given stateid */
static inline void
set_access(u32 access, struct nfs4_ol_stateid *stp)
{
- __set_bit(access, &stp->st_access_bmap);
+ unsigned char mask = 1 << access;
+
+ WARN_ON_ONCE(access > NFS4_SHARE_ACCESS_BOTH);
+ stp->st_access_bmap |= mask;
}
/* clear share access for a given stateid */
static inline void
clear_access(u32 access, struct nfs4_ol_stateid *stp)
{
- __clear_bit(access, &stp->st_access_bmap);
+ unsigned char mask = 1 << access;
+
+ WARN_ON_ONCE(access > NFS4_SHARE_ACCESS_BOTH);
+ stp->st_access_bmap &= ~mask;
}
/* test whether a given stateid has access */
static inline bool
test_access(u32 access, struct nfs4_ol_stateid *stp)
{
- return test_bit(access, &stp->st_access_bmap);
+ unsigned char mask = 1 << access;
+
+ return (bool)(stp->st_access_bmap & mask);
}
/* set share deny for a given stateid */
static inline void
-set_deny(u32 access, struct nfs4_ol_stateid *stp)
+set_deny(u32 deny, struct nfs4_ol_stateid *stp)
{
- __set_bit(access, &stp->st_deny_bmap);
+ unsigned char mask = 1 << deny;
+
+ WARN_ON_ONCE(deny > NFS4_SHARE_DENY_BOTH);
+ stp->st_deny_bmap |= mask;
}
/* clear share deny for a given stateid */
static inline void
-clear_deny(u32 access, struct nfs4_ol_stateid *stp)
+clear_deny(u32 deny, struct nfs4_ol_stateid *stp)
{
- __clear_bit(access, &stp->st_deny_bmap);
+ unsigned char mask = 1 << deny;
+
+ WARN_ON_ONCE(deny > NFS4_SHARE_DENY_BOTH);
+ stp->st_deny_bmap &= ~mask;
}
/* test whether a given stateid is denying specific access */
static inline bool
-test_deny(u32 access, struct nfs4_ol_stateid *stp)
+test_deny(u32 deny, struct nfs4_ol_stateid *stp)
{
- return test_bit(access, &stp->st_deny_bmap);
+ unsigned char mask = 1 << deny;
+
+ return (bool)(stp->st_deny_bmap & mask);
}
static int nfs4_access_to_omode(u32 access)
@@ -674,138 +861,283 @@ static int nfs4_access_to_omode(u32 access)
return O_RDONLY;
}
+/*
+ * A stateid that had a deny mode associated with it is being released
+ * or downgraded. Recalculate the deny mode on the file.
+ */
+static void
+recalculate_deny_mode(struct nfs4_file *fp)
+{
+ struct nfs4_ol_stateid *stp;
+
+ spin_lock(&fp->fi_lock);
+ fp->fi_share_deny = 0;
+ list_for_each_entry(stp, &fp->fi_stateids, st_perfile)
+ fp->fi_share_deny |= bmap_to_share_mode(stp->st_deny_bmap);
+ spin_unlock(&fp->fi_lock);
+}
+
+static void
+reset_union_bmap_deny(u32 deny, struct nfs4_ol_stateid *stp)
+{
+ int i;
+ bool change = false;
+
+ for (i = 1; i < 4; i++) {
+ if ((i & deny) != i) {
+ change = true;
+ clear_deny(i, stp);
+ }
+ }
+
+ /* Recalculate per-file deny mode if there was a change */
+ if (change)
+ recalculate_deny_mode(stp->st_stid.sc_file);
+}
+
/* release all access and file references for a given stateid */
static void
release_all_access(struct nfs4_ol_stateid *stp)
{
int i;
+ struct nfs4_file *fp = stp->st_stid.sc_file;
+
+ if (fp && stp->st_deny_bmap != 0)
+ recalculate_deny_mode(fp);
for (i = 1; i < 4; i++) {
if (test_access(i, stp))
- nfs4_file_put_access(stp->st_file,
- nfs4_access_to_omode(i));
+ nfs4_file_put_access(stp->st_stid.sc_file, i);
clear_access(i, stp);
}
}
-static void unhash_generic_stateid(struct nfs4_ol_stateid *stp)
+static void nfs4_put_stateowner(struct nfs4_stateowner *sop)
{
+ struct nfs4_client *clp = sop->so_client;
+
+ might_lock(&clp->cl_lock);
+
+ if (!atomic_dec_and_lock(&sop->so_count, &clp->cl_lock))
+ return;
+ sop->so_ops->so_unhash(sop);
+ spin_unlock(&clp->cl_lock);
+ kfree(sop->so_owner.data);
+ sop->so_ops->so_free(sop);
+}
+
+static void unhash_ol_stateid(struct nfs4_ol_stateid *stp)
+{
+ struct nfs4_file *fp = stp->st_stid.sc_file;
+
+ lockdep_assert_held(&stp->st_stateowner->so_client->cl_lock);
+
+ spin_lock(&fp->fi_lock);
list_del(&stp->st_perfile);
+ spin_unlock(&fp->fi_lock);
list_del(&stp->st_perstateowner);
}
-static void close_generic_stateid(struct nfs4_ol_stateid *stp)
+static void nfs4_free_ol_stateid(struct nfs4_stid *stid)
{
+ struct nfs4_ol_stateid *stp = openlockstateid(stid);
+
release_all_access(stp);
- put_nfs4_file(stp->st_file);
- stp->st_file = NULL;
+ if (stp->st_stateowner)
+ nfs4_put_stateowner(stp->st_stateowner);
+ kmem_cache_free(stateid_slab, stid);
}
-static void free_generic_stateid(struct nfs4_ol_stateid *stp)
+static void nfs4_free_lock_stateid(struct nfs4_stid *stid)
{
- remove_stid(&stp->st_stid);
- nfs4_free_stid(stateid_slab, &stp->st_stid);
+ struct nfs4_ol_stateid *stp = openlockstateid(stid);
+ struct nfs4_lockowner *lo = lockowner(stp->st_stateowner);
+ struct file *file;
+
+ file = find_any_file(stp->st_stid.sc_file);
+ if (file)
+ filp_close(file, (fl_owner_t)lo);
+ nfs4_free_ol_stateid(stid);
}
-static void release_lock_stateid(struct nfs4_ol_stateid *stp)
+/*
+ * Put the persistent reference to an already unhashed generic stateid, while
+ * holding the cl_lock. If it's the last reference, then put it onto the
+ * reaplist for later destruction.
+ */
+static void put_ol_stateid_locked(struct nfs4_ol_stateid *stp,
+ struct list_head *reaplist)
{
- struct file *file;
+ struct nfs4_stid *s = &stp->st_stid;
+ struct nfs4_client *clp = s->sc_client;
+
+ lockdep_assert_held(&clp->cl_lock);
- unhash_generic_stateid(stp);
+ WARN_ON_ONCE(!list_empty(&stp->st_locks));
+
+ if (!atomic_dec_and_test(&s->sc_count)) {
+ wake_up_all(&close_wq);
+ return;
+ }
+
+ idr_remove(&clp->cl_stateids, s->sc_stateid.si_opaque.so_id);
+ list_add(&stp->st_locks, reaplist);
+}
+
+static void unhash_lock_stateid(struct nfs4_ol_stateid *stp)
+{
+ struct nfs4_openowner *oo = openowner(stp->st_openstp->st_stateowner);
+
+ lockdep_assert_held(&oo->oo_owner.so_client->cl_lock);
+
+ list_del_init(&stp->st_locks);
+ unhash_ol_stateid(stp);
unhash_stid(&stp->st_stid);
- file = find_any_file(stp->st_file);
- if (file)
- locks_remove_posix(file, (fl_owner_t)lockowner(stp->st_stateowner));
- close_generic_stateid(stp);
- free_generic_stateid(stp);
}
-static void unhash_lockowner(struct nfs4_lockowner *lo)
+static void release_lock_stateid(struct nfs4_ol_stateid *stp)
{
- struct nfs4_ol_stateid *stp;
+ struct nfs4_openowner *oo = openowner(stp->st_openstp->st_stateowner);
- list_del(&lo->lo_owner.so_strhash);
- list_del(&lo->lo_perstateid);
- list_del(&lo->lo_owner_ino_hash);
- while (!list_empty(&lo->lo_owner.so_stateids)) {
- stp = list_first_entry(&lo->lo_owner.so_stateids,
- struct nfs4_ol_stateid, st_perstateowner);
- release_lock_stateid(stp);
- }
+ spin_lock(&oo->oo_owner.so_client->cl_lock);
+ unhash_lock_stateid(stp);
+ spin_unlock(&oo->oo_owner.so_client->cl_lock);
+ nfs4_put_stid(&stp->st_stid);
}
-static void nfs4_free_lockowner(struct nfs4_lockowner *lo)
+static void unhash_lockowner_locked(struct nfs4_lockowner *lo)
{
- kfree(lo->lo_owner.so_owner.data);
- kmem_cache_free(lockowner_slab, lo);
+ struct nfs4_client *clp = lo->lo_owner.so_client;
+
+ lockdep_assert_held(&clp->cl_lock);
+
+ list_del_init(&lo->lo_owner.so_strhash);
+}
+
+/*
+ * Free a list of generic stateids that were collected earlier after being
+ * fully unhashed.
+ */
+static void
+free_ol_stateid_reaplist(struct list_head *reaplist)
+{
+ struct nfs4_ol_stateid *stp;
+ struct nfs4_file *fp;
+
+ might_sleep();
+
+ while (!list_empty(reaplist)) {
+ stp = list_first_entry(reaplist, struct nfs4_ol_stateid,
+ st_locks);
+ list_del(&stp->st_locks);
+ fp = stp->st_stid.sc_file;
+ stp->st_stid.sc_free(&stp->st_stid);
+ if (fp)
+ put_nfs4_file(fp);
+ }
}
static void release_lockowner(struct nfs4_lockowner *lo)
{
- unhash_lockowner(lo);
- nfs4_free_lockowner(lo);
+ struct nfs4_client *clp = lo->lo_owner.so_client;
+ struct nfs4_ol_stateid *stp;
+ struct list_head reaplist;
+
+ INIT_LIST_HEAD(&reaplist);
+
+ spin_lock(&clp->cl_lock);
+ unhash_lockowner_locked(lo);
+ while (!list_empty(&lo->lo_owner.so_stateids)) {
+ stp = list_first_entry(&lo->lo_owner.so_stateids,
+ struct nfs4_ol_stateid, st_perstateowner);
+ unhash_lock_stateid(stp);
+ put_ol_stateid_locked(stp, &reaplist);
+ }
+ spin_unlock(&clp->cl_lock);
+ free_ol_stateid_reaplist(&reaplist);
+ nfs4_put_stateowner(&lo->lo_owner);
}
-static void
-release_stateid_lockowners(struct nfs4_ol_stateid *open_stp)
+static void release_open_stateid_locks(struct nfs4_ol_stateid *open_stp,
+ struct list_head *reaplist)
{
- struct nfs4_lockowner *lo;
+ struct nfs4_ol_stateid *stp;
- while (!list_empty(&open_stp->st_lockowners)) {
- lo = list_entry(open_stp->st_lockowners.next,
- struct nfs4_lockowner, lo_perstateid);
- release_lockowner(lo);
+ while (!list_empty(&open_stp->st_locks)) {
+ stp = list_entry(open_stp->st_locks.next,
+ struct nfs4_ol_stateid, st_locks);
+ unhash_lock_stateid(stp);
+ put_ol_stateid_locked(stp, reaplist);
}
}
-static void unhash_open_stateid(struct nfs4_ol_stateid *stp)
+static void unhash_open_stateid(struct nfs4_ol_stateid *stp,
+ struct list_head *reaplist)
{
- unhash_generic_stateid(stp);
- release_stateid_lockowners(stp);
- close_generic_stateid(stp);
+ lockdep_assert_held(&stp->st_stid.sc_client->cl_lock);
+
+ unhash_ol_stateid(stp);
+ release_open_stateid_locks(stp, reaplist);
}
static void release_open_stateid(struct nfs4_ol_stateid *stp)
{
- unhash_open_stateid(stp);
- free_generic_stateid(stp);
+ LIST_HEAD(reaplist);
+
+ spin_lock(&stp->st_stid.sc_client->cl_lock);
+ unhash_open_stateid(stp, &reaplist);
+ put_ol_stateid_locked(stp, &reaplist);
+ spin_unlock(&stp->st_stid.sc_client->cl_lock);
+ free_ol_stateid_reaplist(&reaplist);
}
-static void unhash_openowner(struct nfs4_openowner *oo)
+static void unhash_openowner_locked(struct nfs4_openowner *oo)
{
- struct nfs4_ol_stateid *stp;
+ struct nfs4_client *clp = oo->oo_owner.so_client;
- list_del(&oo->oo_owner.so_strhash);
- list_del(&oo->oo_perclient);
- while (!list_empty(&oo->oo_owner.so_stateids)) {
- stp = list_first_entry(&oo->oo_owner.so_stateids,
- struct nfs4_ol_stateid, st_perstateowner);
- release_open_stateid(stp);
- }
+ lockdep_assert_held(&clp->cl_lock);
+
+ list_del_init(&oo->oo_owner.so_strhash);
+ list_del_init(&oo->oo_perclient);
}
static void release_last_closed_stateid(struct nfs4_openowner *oo)
{
- struct nfs4_ol_stateid *s = oo->oo_last_closed_stid;
+ struct nfsd_net *nn = net_generic(oo->oo_owner.so_client->net,
+ nfsd_net_id);
+ struct nfs4_ol_stateid *s;
+ spin_lock(&nn->client_lock);
+ s = oo->oo_last_closed_stid;
if (s) {
- free_generic_stateid(s);
+ list_del_init(&oo->oo_close_lru);
oo->oo_last_closed_stid = NULL;
}
-}
-
-static void nfs4_free_openowner(struct nfs4_openowner *oo)
-{
- kfree(oo->oo_owner.so_owner.data);
- kmem_cache_free(openowner_slab, oo);
+ spin_unlock(&nn->client_lock);
+ if (s)
+ nfs4_put_stid(&s->st_stid);
}
static void release_openowner(struct nfs4_openowner *oo)
{
- unhash_openowner(oo);
- list_del(&oo->oo_close_lru);
+ struct nfs4_ol_stateid *stp;
+ struct nfs4_client *clp = oo->oo_owner.so_client;
+ struct list_head reaplist;
+
+ INIT_LIST_HEAD(&reaplist);
+
+ spin_lock(&clp->cl_lock);
+ unhash_openowner_locked(oo);
+ while (!list_empty(&oo->oo_owner.so_stateids)) {
+ stp = list_first_entry(&oo->oo_owner.so_stateids,
+ struct nfs4_ol_stateid, st_perstateowner);
+ unhash_open_stateid(stp, &reaplist);
+ put_ol_stateid_locked(stp, &reaplist);
+ }
+ spin_unlock(&clp->cl_lock);
+ free_ol_stateid_reaplist(&reaplist);
release_last_closed_stateid(oo);
- nfs4_free_openowner(oo);
+ nfs4_put_stateowner(&oo->oo_owner);
}
static inline int
@@ -842,7 +1174,7 @@ void nfsd4_bump_seqid(struct nfsd4_compound_state *cstate, __be32 nfserr)
return;
if (!seqid_mutating_err(ntohl(nfserr))) {
- cstate->replay_owner = NULL;
+ nfsd4_cstate_clear_replay(cstate);
return;
}
if (!so)
@@ -1030,10 +1362,8 @@ static void nfsd4_init_conn(struct svc_rqst *rqstp, struct nfsd4_conn *conn, str
if (ret)
/* oops; xprt is already down: */
nfsd4_conn_lost(&conn->cn_xpt_user);
- if (conn->cn_flags & NFS4_CDFC4_BACK) {
- /* callback channel may be back up */
- nfsd4_probe_callback(ses->se_client);
- }
+ /* We may have gained or lost a callback channel: */
+ nfsd4_probe_callback_sync(ses->se_client);
}
static struct nfsd4_conn *alloc_conn_from_crses(struct svc_rqst *rqstp, struct nfsd4_create_session *cses)
@@ -1073,9 +1403,6 @@ static void __free_session(struct nfsd4_session *ses)
static void free_session(struct nfsd4_session *ses)
{
- struct nfsd_net *nn = net_generic(ses->se_client->net, nfsd_net_id);
-
- lockdep_assert_held(&nn->client_lock);
nfsd4_del_conns(ses);
nfsd4_put_drc_mem(&ses->se_fchannel);
__free_session(ses);
@@ -1097,12 +1424,10 @@ static void init_session(struct svc_rqst *rqstp, struct nfsd4_session *new, stru
new->se_cb_sec = cses->cb_sec;
atomic_set(&new->se_ref, 0);
idx = hash_sessionid(&new->se_sessionid);
- spin_lock(&nn->client_lock);
list_add(&new->se_hash, &nn->sessionid_hashtbl[idx]);
spin_lock(&clp->cl_lock);
list_add(&new->se_perclnt, &clp->cl_sessions);
spin_unlock(&clp->cl_lock);
- spin_unlock(&nn->client_lock);
if (cses->flags & SESSION4_BACK_CHAN) {
struct sockaddr *sa = svc_addr(rqstp);
@@ -1120,12 +1445,14 @@ static void init_session(struct svc_rqst *rqstp, struct nfsd4_session *new, stru
/* caller must hold client_lock */
static struct nfsd4_session *
-find_in_sessionid_hashtbl(struct nfs4_sessionid *sessionid, struct net *net)
+__find_in_sessionid_hashtbl(struct nfs4_sessionid *sessionid, struct net *net)
{
struct nfsd4_session *elem;
int idx;
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+ lockdep_assert_held(&nn->client_lock);
+
dump_sessionid(__func__, sessionid);
idx = hash_sessionid(sessionid);
/* Search in the appropriate list */
@@ -1140,10 +1467,33 @@ find_in_sessionid_hashtbl(struct nfs4_sessionid *sessionid, struct net *net)
return NULL;
}
+static struct nfsd4_session *
+find_in_sessionid_hashtbl(struct nfs4_sessionid *sessionid, struct net *net,
+ __be32 *ret)
+{
+ struct nfsd4_session *session;
+ __be32 status = nfserr_badsession;
+
+ session = __find_in_sessionid_hashtbl(sessionid, net);
+ if (!session)
+ goto out;
+ status = nfsd4_get_session_locked(session);
+ if (status)
+ session = NULL;
+out:
+ *ret = status;
+ return session;
+}
+
/* caller must hold client_lock */
static void
unhash_session(struct nfsd4_session *ses)
{
+ struct nfs4_client *clp = ses->se_client;
+ struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
+
+ lockdep_assert_held(&nn->client_lock);
+
list_del(&ses->se_hash);
spin_lock(&ses->se_client->cl_lock);
list_del(&ses->se_perclnt);
@@ -1169,15 +1519,20 @@ STALE_CLIENTID(clientid_t *clid, struct nfsd_net *nn)
static struct nfs4_client *alloc_client(struct xdr_netobj name)
{
struct nfs4_client *clp;
+ int i;
clp = kzalloc(sizeof(struct nfs4_client), GFP_KERNEL);
if (clp == NULL)
return NULL;
clp->cl_name.data = kmemdup(name.data, name.len, GFP_KERNEL);
- if (clp->cl_name.data == NULL) {
- kfree(clp);
- return NULL;
- }
+ if (clp->cl_name.data == NULL)
+ goto err_no_name;
+ clp->cl_ownerstr_hashtbl = kmalloc(sizeof(struct list_head) *
+ OWNER_HASH_SIZE, GFP_KERNEL);
+ if (!clp->cl_ownerstr_hashtbl)
+ goto err_no_hashtbl;
+ for (i = 0; i < OWNER_HASH_SIZE; i++)
+ INIT_LIST_HEAD(&clp->cl_ownerstr_hashtbl[i]);
clp->cl_name.len = name.len;
INIT_LIST_HEAD(&clp->cl_sessions);
idr_init(&clp->cl_stateids);
@@ -1192,14 +1547,16 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name)
spin_lock_init(&clp->cl_lock);
rpc_init_wait_queue(&clp->cl_cb_waitq, "Backchannel slot table");
return clp;
+err_no_hashtbl:
+ kfree(clp->cl_name.data);
+err_no_name:
+ kfree(clp);
+ return NULL;
}
static void
free_client(struct nfs4_client *clp)
{
- struct nfsd_net __maybe_unused *nn = net_generic(clp->net, nfsd_net_id);
-
- lockdep_assert_held(&nn->client_lock);
while (!list_empty(&clp->cl_sessions)) {
struct nfsd4_session *ses;
ses = list_entry(clp->cl_sessions.next, struct nfsd4_session,
@@ -1210,18 +1567,32 @@ free_client(struct nfs4_client *clp)
}
rpc_destroy_wait_queue(&clp->cl_cb_waitq);
free_svc_cred(&clp->cl_cred);
+ kfree(clp->cl_ownerstr_hashtbl);
kfree(clp->cl_name.data);
idr_destroy(&clp->cl_stateids);
kfree(clp);
}
/* must be called under the client_lock */
-static inline void
+static void
unhash_client_locked(struct nfs4_client *clp)
{
+ struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
struct nfsd4_session *ses;
- list_del(&clp->cl_lru);
+ lockdep_assert_held(&nn->client_lock);
+
+ /* Mark the client as expired! */
+ clp->cl_time = 0;
+ /* Make it invisible */
+ if (!list_empty(&clp->cl_idhash)) {
+ list_del_init(&clp->cl_idhash);
+ if (test_bit(NFSD4_CLIENT_CONFIRMED, &clp->cl_flags))
+ rb_erase(&clp->cl_namenode, &nn->conf_name_tree);
+ else
+ rb_erase(&clp->cl_namenode, &nn->unconf_name_tree);
+ }
+ list_del_init(&clp->cl_lru);
spin_lock(&clp->cl_lock);
list_for_each_entry(ses, &clp->cl_sessions, se_perclnt)
list_del_init(&ses->se_hash);
@@ -1229,53 +1600,71 @@ unhash_client_locked(struct nfs4_client *clp)
}
static void
-destroy_client(struct nfs4_client *clp)
+unhash_client(struct nfs4_client *clp)
+{
+ struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
+
+ spin_lock(&nn->client_lock);
+ unhash_client_locked(clp);
+ spin_unlock(&nn->client_lock);
+}
+
+static __be32 mark_client_expired_locked(struct nfs4_client *clp)
+{
+ if (atomic_read(&clp->cl_refcount))
+ return nfserr_jukebox;
+ unhash_client_locked(clp);
+ return nfs_ok;
+}
+
+static void
+__destroy_client(struct nfs4_client *clp)
{
struct nfs4_openowner *oo;
struct nfs4_delegation *dp;
struct list_head reaplist;
- struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
INIT_LIST_HEAD(&reaplist);
spin_lock(&state_lock);
while (!list_empty(&clp->cl_delegations)) {
dp = list_entry(clp->cl_delegations.next, struct nfs4_delegation, dl_perclnt);
- list_del_init(&dp->dl_perclnt);
- list_move(&dp->dl_recall_lru, &reaplist);
+ unhash_delegation_locked(dp);
+ list_add(&dp->dl_recall_lru, &reaplist);
}
spin_unlock(&state_lock);
while (!list_empty(&reaplist)) {
dp = list_entry(reaplist.next, struct nfs4_delegation, dl_recall_lru);
- destroy_delegation(dp);
+ list_del_init(&dp->dl_recall_lru);
+ nfs4_put_stid(&dp->dl_stid);
}
- list_splice_init(&clp->cl_revoked, &reaplist);
- while (!list_empty(&reaplist)) {
+ while (!list_empty(&clp->cl_revoked)) {
dp = list_entry(reaplist.next, struct nfs4_delegation, dl_recall_lru);
- destroy_revoked_delegation(dp);
+ list_del_init(&dp->dl_recall_lru);
+ nfs4_put_stid(&dp->dl_stid);
}
while (!list_empty(&clp->cl_openowners)) {
oo = list_entry(clp->cl_openowners.next, struct nfs4_openowner, oo_perclient);
+ atomic_inc(&oo->oo_owner.so_count);
release_openowner(oo);
}
nfsd4_shutdown_callback(clp);
if (clp->cl_cb_conn.cb_xprt)
svc_xprt_put(clp->cl_cb_conn.cb_xprt);
- list_del(&clp->cl_idhash);
- if (test_bit(NFSD4_CLIENT_CONFIRMED, &clp->cl_flags))
- rb_erase(&clp->cl_namenode, &nn->conf_name_tree);
- else
- rb_erase(&clp->cl_namenode, &nn->unconf_name_tree);
- spin_lock(&nn->client_lock);
- unhash_client_locked(clp);
- WARN_ON_ONCE(atomic_read(&clp->cl_refcount));
free_client(clp);
- spin_unlock(&nn->client_lock);
+}
+
+static void
+destroy_client(struct nfs4_client *clp)
+{
+ unhash_client(clp);
+ __destroy_client(clp);
}
static void expire_client(struct nfs4_client *clp)
{
+ unhash_client(clp);
nfsd4_client_record_remove(clp);
- destroy_client(clp);
+ __destroy_client(clp);
}
static void copy_verf(struct nfs4_client *target, nfs4_verifier *source)
@@ -1408,25 +1797,28 @@ static bool mach_creds_match(struct nfs4_client *cl, struct svc_rqst *rqstp)
return 0 == strcmp(cl->cl_cred.cr_principal, cr->cr_principal);
}
-static void gen_clid(struct nfs4_client *clp, struct nfsd_net *nn)
+static void gen_confirm(struct nfs4_client *clp, struct nfsd_net *nn)
{
- static u32 current_clientid = 1;
+ __be32 verf[2];
- clp->cl_clientid.cl_boot = nn->boot_time;
- clp->cl_clientid.cl_id = current_clientid++;
+ /*
+ * This is opaque to client, so no need to byte-swap. Use
+ * __force to keep sparse happy
+ */
+ verf[0] = (__force __be32)get_seconds();
+ verf[1] = (__force __be32)nn->clientid_counter;
+ memcpy(clp->cl_confirm.data, verf, sizeof(clp->cl_confirm.data));
}
-static void gen_confirm(struct nfs4_client *clp)
+static void gen_clid(struct nfs4_client *clp, struct nfsd_net *nn)
{
- __be32 verf[2];
- static u32 i;
-
- verf[0] = (__be32)get_seconds();
- verf[1] = (__be32)i++;
- memcpy(clp->cl_confirm.data, verf, sizeof(clp->cl_confirm.data));
+ clp->cl_clientid.cl_boot = nn->boot_time;
+ clp->cl_clientid.cl_id = nn->clientid_counter++;
+ gen_confirm(clp, nn);
}
-static struct nfs4_stid *find_stateid(struct nfs4_client *cl, stateid_t *t)
+static struct nfs4_stid *
+find_stateid_locked(struct nfs4_client *cl, stateid_t *t)
{
struct nfs4_stid *ret;
@@ -1436,16 +1828,21 @@ static struct nfs4_stid *find_stateid(struct nfs4_client *cl, stateid_t *t)
return ret;
}
-static struct nfs4_stid *find_stateid_by_type(struct nfs4_client *cl, stateid_t *t, char typemask)
+static struct nfs4_stid *
+find_stateid_by_type(struct nfs4_client *cl, stateid_t *t, char typemask)
{
struct nfs4_stid *s;
- s = find_stateid(cl, t);
- if (!s)
- return NULL;
- if (typemask & s->sc_type)
- return s;
- return NULL;
+ spin_lock(&cl->cl_lock);
+ s = find_stateid_locked(cl, t);
+ if (s != NULL) {
+ if (typemask & s->sc_type)
+ atomic_inc(&s->sc_count);
+ else
+ s = NULL;
+ }
+ spin_unlock(&cl->cl_lock);
+ return s;
}
static struct nfs4_client *create_client(struct xdr_netobj name,
@@ -1455,7 +1852,6 @@ static struct nfs4_client *create_client(struct xdr_netobj name,
struct sockaddr *sa = svc_addr(rqstp);
int ret;
struct net *net = SVC_NET(rqstp);
- struct nfsd_net *nn = net_generic(net, nfsd_net_id);
clp = alloc_client(name);
if (clp == NULL)
@@ -1463,17 +1859,14 @@ static struct nfs4_client *create_client(struct xdr_netobj name,
ret = copy_cred(&clp->cl_cred, &rqstp->rq_cred);
if (ret) {
- spin_lock(&nn->client_lock);
free_client(clp);
- spin_unlock(&nn->client_lock);
return NULL;
}
- nfsd4_init_callback(&clp->cl_cb_null);
+ INIT_WORK(&clp->cl_cb_null.cb_work, nfsd4_run_cb_null);
clp->cl_time = get_seconds();
clear_bit(0, &clp->cl_cb_slot_busy);
copy_verf(clp, verf);
rpc_copy_addr((struct sockaddr *) &clp->cl_addr, sa);
- gen_confirm(clp);
clp->cl_cb_session = NULL;
clp->net = net;
return clp;
@@ -1525,11 +1918,13 @@ add_to_unconfirmed(struct nfs4_client *clp)
unsigned int idhashval;
struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
+ lockdep_assert_held(&nn->client_lock);
+
clear_bit(NFSD4_CLIENT_CONFIRMED, &clp->cl_flags);
add_clp_to_name_tree(clp, &nn->unconf_name_tree);
idhashval = clientid_hashval(clp->cl_clientid.cl_id);
list_add(&clp->cl_idhash, &nn->unconf_id_hashtbl[idhashval]);
- renew_client(clp);
+ renew_client_locked(clp);
}
static void
@@ -1538,12 +1933,14 @@ move_to_confirmed(struct nfs4_client *clp)
unsigned int idhashval = clientid_hashval(clp->cl_clientid.cl_id);
struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
+ lockdep_assert_held(&nn->client_lock);
+
dprintk("NFSD: move_to_confirm nfs4_client %p\n", clp);
list_move(&clp->cl_idhash, &nn->conf_id_hashtbl[idhashval]);
rb_erase(&clp->cl_namenode, &nn->unconf_name_tree);
add_clp_to_name_tree(clp, &nn->conf_name_tree);
set_bit(NFSD4_CLIENT_CONFIRMED, &clp->cl_flags);
- renew_client(clp);
+ renew_client_locked(clp);
}
static struct nfs4_client *
@@ -1556,7 +1953,7 @@ find_client_in_id_table(struct list_head *tbl, clientid_t *clid, bool sessions)
if (same_clid(&clp->cl_clientid, clid)) {
if ((bool)clp->cl_minorversion != sessions)
return NULL;
- renew_client(clp);
+ renew_client_locked(clp);
return clp;
}
}
@@ -1568,6 +1965,7 @@ find_confirmed_client(clientid_t *clid, bool sessions, struct nfsd_net *nn)
{
struct list_head *tbl = nn->conf_id_hashtbl;
+ lockdep_assert_held(&nn->client_lock);
return find_client_in_id_table(tbl, clid, sessions);
}
@@ -1576,6 +1974,7 @@ find_unconfirmed_client(clientid_t *clid, bool sessions, struct nfsd_net *nn)
{
struct list_head *tbl = nn->unconf_id_hashtbl;
+ lockdep_assert_held(&nn->client_lock);
return find_client_in_id_table(tbl, clid, sessions);
}
@@ -1587,12 +1986,14 @@ static bool clp_used_exchangeid(struct nfs4_client *clp)
static struct nfs4_client *
find_confirmed_client_by_name(struct xdr_netobj *name, struct nfsd_net *nn)
{
+ lockdep_assert_held(&nn->client_lock);
return find_clp_in_name_tree(name, &nn->conf_name_tree);
}
static struct nfs4_client *
find_unconfirmed_client_by_name(struct xdr_netobj *name, struct nfsd_net *nn)
{
+ lockdep_assert_held(&nn->client_lock);
return find_clp_in_name_tree(name, &nn->unconf_name_tree);
}
@@ -1642,7 +2043,7 @@ out_err:
/*
* Cache a reply. nfsd4_check_resp_size() has bounded the cache size.
*/
-void
+static void
nfsd4_store_cache_entry(struct nfsd4_compoundres *resp)
{
struct xdr_buf *buf = resp->xdr.buf;
@@ -1758,7 +2159,8 @@ nfsd4_exchange_id(struct svc_rqst *rqstp,
struct nfsd4_compound_state *cstate,
struct nfsd4_exchange_id *exid)
{
- struct nfs4_client *unconf, *conf, *new;
+ struct nfs4_client *conf, *new;
+ struct nfs4_client *unconf = NULL;
__be32 status;
char addr_str[INET6_ADDRSTRLEN];
nfs4_verifier verf = exid->verifier;
@@ -1787,8 +2189,12 @@ nfsd4_exchange_id(struct svc_rqst *rqstp,
return nfserr_encr_alg_unsupp;
}
+ new = create_client(exid->clname, rqstp, &verf);
+ if (new == NULL)
+ return nfserr_jukebox;
+
/* Cases below refer to rfc 5661 section 18.35.4: */
- nfs4_lock_state();
+ spin_lock(&nn->client_lock);
conf = find_confirmed_client_by_name(&exid->clname, nn);
if (conf) {
bool creds_match = same_creds(&conf->cl_cred, &rqstp->rq_cred);
@@ -1813,7 +2219,6 @@ nfsd4_exchange_id(struct svc_rqst *rqstp,
}
/* case 6 */
exid->flags |= EXCHGID4_FLAG_CONFIRMED_R;
- new = conf;
goto out_copy;
}
if (!creds_match) { /* case 3 */
@@ -1821,15 +2226,14 @@ nfsd4_exchange_id(struct svc_rqst *rqstp,
status = nfserr_clid_inuse;
goto out;
}
- expire_client(conf);
goto out_new;
}
if (verfs_match) { /* case 2 */
conf->cl_exchange_flags |= EXCHGID4_FLAG_CONFIRMED_R;
- new = conf;
goto out_copy;
}
/* case 5, client reboot */
+ conf = NULL;
goto out_new;
}
@@ -1840,33 +2244,38 @@ nfsd4_exchange_id(struct svc_rqst *rqstp,
unconf = find_unconfirmed_client_by_name(&exid->clname, nn);
if (unconf) /* case 4, possible retry or client restart */
- expire_client(unconf);
+ unhash_client_locked(unconf);
/* case 1 (normal case) */
out_new:
- new = create_client(exid->clname, rqstp, &verf);
- if (new == NULL) {
- status = nfserr_jukebox;
- goto out;
+ if (conf) {
+ status = mark_client_expired_locked(conf);
+ if (status)
+ goto out;
}
new->cl_minorversion = cstate->minorversion;
new->cl_mach_cred = (exid->spa_how == SP4_MACH_CRED);
gen_clid(new, nn);
add_to_unconfirmed(new);
+ swap(new, conf);
out_copy:
- exid->clientid.cl_boot = new->cl_clientid.cl_boot;
- exid->clientid.cl_id = new->cl_clientid.cl_id;
+ exid->clientid.cl_boot = conf->cl_clientid.cl_boot;
+ exid->clientid.cl_id = conf->cl_clientid.cl_id;
- exid->seqid = new->cl_cs_slot.sl_seqid + 1;
- nfsd4_set_ex_flags(new, exid);
+ exid->seqid = conf->cl_cs_slot.sl_seqid + 1;
+ nfsd4_set_ex_flags(conf, exid);
dprintk("nfsd4_exchange_id seqid %d flags %x\n",
- new->cl_cs_slot.sl_seqid, new->cl_exchange_flags);
+ conf->cl_cs_slot.sl_seqid, conf->cl_exchange_flags);
status = nfs_ok;
out:
- nfs4_unlock_state();
+ spin_unlock(&nn->client_lock);
+ if (new)
+ expire_client(new);
+ if (unconf)
+ expire_client(unconf);
return status;
}
@@ -2010,6 +2419,7 @@ nfsd4_create_session(struct svc_rqst *rqstp,
{
struct sockaddr *sa = svc_addr(rqstp);
struct nfs4_client *conf, *unconf;
+ struct nfs4_client *old = NULL;
struct nfsd4_session *new;
struct nfsd4_conn *conn;
struct nfsd4_clid_slot *cs_slot = NULL;
@@ -2035,7 +2445,7 @@ nfsd4_create_session(struct svc_rqst *rqstp,
if (!conn)
goto out_free_session;
- nfs4_lock_state();
+ spin_lock(&nn->client_lock);
unconf = find_unconfirmed_client(&cr_ses->clientid, true, nn);
conf = find_confirmed_client(&cr_ses->clientid, true, nn);
WARN_ON_ONCE(conf && unconf);
@@ -2054,7 +2464,6 @@ nfsd4_create_session(struct svc_rqst *rqstp,
goto out_free_conn;
}
} else if (unconf) {
- struct nfs4_client *old;
if (!same_creds(&unconf->cl_cred, &rqstp->rq_cred) ||
!rpc_cmp_addr(sa, (struct sockaddr *) &unconf->cl_addr)) {
status = nfserr_clid_inuse;
@@ -2072,10 +2481,11 @@ nfsd4_create_session(struct svc_rqst *rqstp,
}
old = find_confirmed_client_by_name(&unconf->cl_name, nn);
if (old) {
- status = mark_client_expired(old);
- if (status)
+ status = mark_client_expired_locked(old);
+ if (status) {
+ old = NULL;
goto out_free_conn;
- expire_client(old);
+ }
}
move_to_confirmed(unconf);
conf = unconf;
@@ -2091,20 +2501,27 @@ nfsd4_create_session(struct svc_rqst *rqstp,
cr_ses->flags &= ~SESSION4_RDMA;
init_session(rqstp, new, conf, cr_ses);
- nfsd4_init_conn(rqstp, conn, new);
+ nfsd4_get_session_locked(new);
memcpy(cr_ses->sessionid.data, new->se_sessionid.data,
NFS4_MAX_SESSIONID_LEN);
cs_slot->sl_seqid++;
cr_ses->seqid = cs_slot->sl_seqid;
- /* cache solo and embedded create sessions under the state lock */
+ /* cache solo and embedded create sessions under the client_lock */
nfsd4_cache_create_session(cr_ses, cs_slot, status);
- nfs4_unlock_state();
+ spin_unlock(&nn->client_lock);
+ /* init connection and backchannel */
+ nfsd4_init_conn(rqstp, conn, new);
+ nfsd4_put_session(new);
+ if (old)
+ expire_client(old);
return status;
out_free_conn:
- nfs4_unlock_state();
+ spin_unlock(&nn->client_lock);
free_conn(conn);
+ if (old)
+ expire_client(old);
out_free_session:
__free_session(new);
out_release_drc_mem:
@@ -2152,17 +2569,16 @@ __be32 nfsd4_bind_conn_to_session(struct svc_rqst *rqstp,
__be32 status;
struct nfsd4_conn *conn;
struct nfsd4_session *session;
- struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
+ struct net *net = SVC_NET(rqstp);
+ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
if (!nfsd4_last_compound_op(rqstp))
return nfserr_not_only_op;
- nfs4_lock_state();
spin_lock(&nn->client_lock);
- session = find_in_sessionid_hashtbl(&bcts->sessionid, SVC_NET(rqstp));
+ session = find_in_sessionid_hashtbl(&bcts->sessionid, net, &status);
spin_unlock(&nn->client_lock);
- status = nfserr_badsession;
if (!session)
- goto out;
+ goto out_no_session;
status = nfserr_wrong_cred;
if (!mach_creds_match(session->se_client, rqstp))
goto out;
@@ -2176,7 +2592,8 @@ __be32 nfsd4_bind_conn_to_session(struct svc_rqst *rqstp,
nfsd4_init_conn(rqstp, conn, session);
status = nfs_ok;
out:
- nfs4_unlock_state();
+ nfsd4_put_session(session);
+out_no_session:
return status;
}
@@ -2195,9 +2612,9 @@ nfsd4_destroy_session(struct svc_rqst *r,
struct nfsd4_session *ses;
__be32 status;
int ref_held_by_me = 0;
- struct nfsd_net *nn = net_generic(SVC_NET(r), nfsd_net_id);
+ struct net *net = SVC_NET(r);
+ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
- nfs4_lock_state();
status = nfserr_not_only_op;
if (nfsd4_compound_in_session(cstate->session, &sessionid->sessionid)) {
if (!nfsd4_last_compound_op(r))
@@ -2206,14 +2623,12 @@ nfsd4_destroy_session(struct svc_rqst *r,
}
dump_sessionid(__func__, &sessionid->sessionid);
spin_lock(&nn->client_lock);
- ses = find_in_sessionid_hashtbl(&sessionid->sessionid, SVC_NET(r));
- status = nfserr_badsession;
+ ses = find_in_sessionid_hashtbl(&sessionid->sessionid, net, &status);
if (!ses)
goto out_client_lock;
status = nfserr_wrong_cred;
if (!mach_creds_match(ses->se_client, r))
- goto out_client_lock;
- nfsd4_get_session_locked(ses);
+ goto out_put_session;
status = mark_session_dead_locked(ses, 1 + ref_held_by_me);
if (status)
goto out_put_session;
@@ -2225,11 +2640,10 @@ nfsd4_destroy_session(struct svc_rqst *r,
spin_lock(&nn->client_lock);
status = nfs_ok;
out_put_session:
- nfsd4_put_session(ses);
+ nfsd4_put_session_locked(ses);
out_client_lock:
spin_unlock(&nn->client_lock);
out:
- nfs4_unlock_state();
return status;
}
@@ -2300,7 +2714,8 @@ nfsd4_sequence(struct svc_rqst *rqstp,
struct nfsd4_conn *conn;
__be32 status;
int buflen;
- struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
+ struct net *net = SVC_NET(rqstp);
+ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
if (resp->opcnt != 1)
return nfserr_sequence_pos;
@@ -2314,17 +2729,10 @@ nfsd4_sequence(struct svc_rqst *rqstp,
return nfserr_jukebox;
spin_lock(&nn->client_lock);
- status = nfserr_badsession;
- session = find_in_sessionid_hashtbl(&seq->sessionid, SVC_NET(rqstp));
+ session = find_in_sessionid_hashtbl(&seq->sessionid, net, &status);
if (!session)
goto out_no_session;
clp = session->se_client;
- status = get_client_locked(clp);
- if (status)
- goto out_no_session;
- status = nfsd4_get_session_locked(session);
- if (status)
- goto out_put_client;
status = nfserr_too_many_ops;
if (nfsd4_session_too_many_ops(rqstp, session))
@@ -2354,6 +2762,7 @@ nfsd4_sequence(struct svc_rqst *rqstp,
goto out_put_session;
cstate->slot = slot;
cstate->session = session;
+ cstate->clp = clp;
/* Return the cached reply status and set cstate->status
* for nfsd4_proc_compound processing */
status = nfsd4_replay_cache_entry(resp, seq);
@@ -2388,6 +2797,7 @@ nfsd4_sequence(struct svc_rqst *rqstp,
cstate->slot = slot;
cstate->session = session;
+ cstate->clp = clp;
out:
switch (clp->cl_cb_state) {
@@ -2408,31 +2818,48 @@ out_no_session:
spin_unlock(&nn->client_lock);
return status;
out_put_session:
- nfsd4_put_session(session);
-out_put_client:
- put_client_renew_locked(clp);
+ nfsd4_put_session_locked(session);
goto out_no_session;
}
+void
+nfsd4_sequence_done(struct nfsd4_compoundres *resp)
+{
+ struct nfsd4_compound_state *cs = &resp->cstate;
+
+ if (nfsd4_has_session(cs)) {
+ if (cs->status != nfserr_replay_cache) {
+ nfsd4_store_cache_entry(resp);
+ cs->slot->sl_flags &= ~NFSD4_SLOT_INUSE;
+ }
+ /* Drop session reference that was taken in nfsd4_sequence() */
+ nfsd4_put_session(cs->session);
+ } else if (cs->clp)
+ put_client_renew(cs->clp);
+}
+
__be32
nfsd4_destroy_clientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_destroy_clientid *dc)
{
- struct nfs4_client *conf, *unconf, *clp;
+ struct nfs4_client *conf, *unconf;
+ struct nfs4_client *clp = NULL;
__be32 status = 0;
struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
- nfs4_lock_state();
+ spin_lock(&nn->client_lock);
unconf = find_unconfirmed_client(&dc->clientid, true, nn);
conf = find_confirmed_client(&dc->clientid, true, nn);
WARN_ON_ONCE(conf && unconf);
if (conf) {
- clp = conf;
-
if (client_has_state(conf)) {
status = nfserr_clientid_busy;
goto out;
}
+ status = mark_client_expired_locked(conf);
+ if (status)
+ goto out;
+ clp = conf;
} else if (unconf)
clp = unconf;
else {
@@ -2440,12 +2867,15 @@ nfsd4_destroy_clientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *csta
goto out;
}
if (!mach_creds_match(clp, rqstp)) {
+ clp = NULL;
status = nfserr_wrong_cred;
goto out;
}
- expire_client(clp);
+ unhash_client_locked(clp);
out:
- nfs4_unlock_state();
+ spin_unlock(&nn->client_lock);
+ if (clp)
+ expire_client(clp);
return status;
}
@@ -2464,7 +2894,6 @@ nfsd4_reclaim_complete(struct svc_rqst *rqstp, struct nfsd4_compound_state *csta
return nfs_ok;
}
- nfs4_lock_state();
status = nfserr_complete_already;
if (test_and_set_bit(NFSD4_CLIENT_RECLAIM_COMPLETE,
&cstate->session->se_client->cl_flags))
@@ -2484,7 +2913,6 @@ nfsd4_reclaim_complete(struct svc_rqst *rqstp, struct nfsd4_compound_state *csta
status = nfs_ok;
nfsd4_client_record_create(cstate->session->se_client);
out:
- nfs4_unlock_state();
return status;
}
@@ -2494,12 +2922,16 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
{
struct xdr_netobj clname = setclid->se_name;
nfs4_verifier clverifier = setclid->se_verf;
- struct nfs4_client *conf, *unconf, *new;
+ struct nfs4_client *conf, *new;
+ struct nfs4_client *unconf = NULL;
__be32 status;
struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
+ new = create_client(clname, rqstp, &clverifier);
+ if (new == NULL)
+ return nfserr_jukebox;
/* Cases below refer to rfc 3530 section 14.2.33: */
- nfs4_lock_state();
+ spin_lock(&nn->client_lock);
conf = find_confirmed_client_by_name(&clname, nn);
if (conf) {
/* case 0: */
@@ -2517,11 +2949,7 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
}
unconf = find_unconfirmed_client_by_name(&clname, nn);
if (unconf)
- expire_client(unconf);
- status = nfserr_jukebox;
- new = create_client(clname, rqstp, &clverifier);
- if (new == NULL)
- goto out;
+ unhash_client_locked(unconf);
if (conf && same_verf(&conf->cl_verifier, &clverifier))
/* case 1: probable callback update */
copy_clid(new, conf);
@@ -2533,9 +2961,14 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
setclid->se_clientid.cl_boot = new->cl_clientid.cl_boot;
setclid->se_clientid.cl_id = new->cl_clientid.cl_id;
memcpy(setclid->se_confirm.data, new->cl_confirm.data, sizeof(setclid->se_confirm.data));
+ new = NULL;
status = nfs_ok;
out:
- nfs4_unlock_state();
+ spin_unlock(&nn->client_lock);
+ if (new)
+ free_client(new);
+ if (unconf)
+ expire_client(unconf);
return status;
}
@@ -2546,6 +2979,7 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
struct nfsd4_setclientid_confirm *setclientid_confirm)
{
struct nfs4_client *conf, *unconf;
+ struct nfs4_client *old = NULL;
nfs4_verifier confirm = setclientid_confirm->sc_confirm;
clientid_t * clid = &setclientid_confirm->sc_clientid;
__be32 status;
@@ -2553,8 +2987,8 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
if (STALE_CLIENTID(clid, nn))
return nfserr_stale_clientid;
- nfs4_lock_state();
+ spin_lock(&nn->client_lock);
conf = find_confirmed_client(clid, false, nn);
unconf = find_unconfirmed_client(clid, false, nn);
/*
@@ -2578,22 +3012,30 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
}
status = nfs_ok;
if (conf) { /* case 1: callback update */
+ old = unconf;
+ unhash_client_locked(old);
nfsd4_change_callback(conf, &unconf->cl_cb_conn);
- nfsd4_probe_callback(conf);
- expire_client(unconf);
} else { /* case 3: normal case; new or rebooted client */
- conf = find_confirmed_client_by_name(&unconf->cl_name, nn);
- if (conf) {
- status = mark_client_expired(conf);
- if (status)
+ old = find_confirmed_client_by_name(&unconf->cl_name, nn);
+ if (old) {
+ status = mark_client_expired_locked(old);
+ if (status) {
+ old = NULL;
goto out;
- expire_client(conf);
+ }
}
move_to_confirmed(unconf);
- nfsd4_probe_callback(unconf);
+ conf = unconf;
}
+ get_client_locked(conf);
+ spin_unlock(&nn->client_lock);
+ nfsd4_probe_callback(conf);
+ spin_lock(&nn->client_lock);
+ put_client_renew_locked(conf);
out:
- nfs4_unlock_state();
+ spin_unlock(&nn->client_lock);
+ if (old)
+ expire_client(old);
return status;
}
@@ -2603,21 +3045,23 @@ static struct nfs4_file *nfsd4_alloc_file(void)
}
/* OPEN Share state helper functions */
-static void nfsd4_init_file(struct nfs4_file *fp, struct inode *ino)
+static void nfsd4_init_file(struct nfs4_file *fp, struct knfsd_fh *fh)
{
- unsigned int hashval = file_hashval(ino);
+ unsigned int hashval = file_hashval(fh);
+
+ lockdep_assert_held(&state_lock);
atomic_set(&fp->fi_ref, 1);
+ spin_lock_init(&fp->fi_lock);
INIT_LIST_HEAD(&fp->fi_stateids);
INIT_LIST_HEAD(&fp->fi_delegations);
- fp->fi_inode = igrab(ino);
+ fh_copy_shallow(&fp->fi_fhandle, fh);
fp->fi_had_conflict = false;
fp->fi_lease = NULL;
+ fp->fi_share_deny = 0;
memset(fp->fi_fds, 0, sizeof(fp->fi_fds));
memset(fp->fi_access, 0, sizeof(fp->fi_access));
- spin_lock(&state_lock);
hlist_add_head(&fp->fi_hash, &file_hashtbl[hashval]);
- spin_unlock(&state_lock);
}
void
@@ -2673,6 +3117,28 @@ static void init_nfs4_replay(struct nfs4_replay *rp)
rp->rp_status = nfserr_serverfault;
rp->rp_buflen = 0;
rp->rp_buf = rp->rp_ibuf;
+ mutex_init(&rp->rp_mutex);
+}
+
+static void nfsd4_cstate_assign_replay(struct nfsd4_compound_state *cstate,
+ struct nfs4_stateowner *so)
+{
+ if (!nfsd4_has_session(cstate)) {
+ mutex_lock(&so->so_replay.rp_mutex);
+ cstate->replay_owner = so;
+ atomic_inc(&so->so_count);
+ }
+}
+
+void nfsd4_cstate_clear_replay(struct nfsd4_compound_state *cstate)
+{
+ struct nfs4_stateowner *so = cstate->replay_owner;
+
+ if (so != NULL) {
+ cstate->replay_owner = NULL;
+ mutex_unlock(&so->so_replay.rp_mutex);
+ nfs4_put_stateowner(so);
+ }
}
static inline void *alloc_stateowner(struct kmem_cache *slab, struct xdr_netobj *owner, struct nfs4_client *clp)
@@ -2693,111 +3159,172 @@ static inline void *alloc_stateowner(struct kmem_cache *slab, struct xdr_netobj
INIT_LIST_HEAD(&sop->so_stateids);
sop->so_client = clp;
init_nfs4_replay(&sop->so_replay);
+ atomic_set(&sop->so_count, 1);
return sop;
}
static void hash_openowner(struct nfs4_openowner *oo, struct nfs4_client *clp, unsigned int strhashval)
{
- struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
+ lockdep_assert_held(&clp->cl_lock);
- list_add(&oo->oo_owner.so_strhash, &nn->ownerstr_hashtbl[strhashval]);
+ list_add(&oo->oo_owner.so_strhash,
+ &clp->cl_ownerstr_hashtbl[strhashval]);
list_add(&oo->oo_perclient, &clp->cl_openowners);
}
+static void nfs4_unhash_openowner(struct nfs4_stateowner *so)
+{
+ unhash_openowner_locked(openowner(so));
+}
+
+static void nfs4_free_openowner(struct nfs4_stateowner *so)
+{
+ struct nfs4_openowner *oo = openowner(so);
+
+ kmem_cache_free(openowner_slab, oo);
+}
+
+static const struct nfs4_stateowner_operations openowner_ops = {
+ .so_unhash = nfs4_unhash_openowner,
+ .so_free = nfs4_free_openowner,
+};
+
static struct nfs4_openowner *
-alloc_init_open_stateowner(unsigned int strhashval, struct nfs4_client *clp, struct nfsd4_open *open) {
- struct nfs4_openowner *oo;
+alloc_init_open_stateowner(unsigned int strhashval, struct nfsd4_open *open,
+ struct nfsd4_compound_state *cstate)
+{
+ struct nfs4_client *clp = cstate->clp;
+ struct nfs4_openowner *oo, *ret;
oo = alloc_stateowner(openowner_slab, &open->op_owner, clp);
if (!oo)
return NULL;
+ oo->oo_owner.so_ops = &openowner_ops;
oo->oo_owner.so_is_open_owner = 1;
oo->oo_owner.so_seqid = open->op_seqid;
- oo->oo_flags = NFS4_OO_NEW;
+ oo->oo_flags = 0;
+ if (nfsd4_has_session(cstate))
+ oo->oo_flags |= NFS4_OO_CONFIRMED;
oo->oo_time = 0;
oo->oo_last_closed_stid = NULL;
INIT_LIST_HEAD(&oo->oo_close_lru);
- hash_openowner(oo, clp, strhashval);
+ spin_lock(&clp->cl_lock);
+ ret = find_openstateowner_str_locked(strhashval, open, clp);
+ if (ret == NULL) {
+ hash_openowner(oo, clp, strhashval);
+ ret = oo;
+ } else
+ nfs4_free_openowner(&oo->oo_owner);
+ spin_unlock(&clp->cl_lock);
return oo;
}
static void init_open_stateid(struct nfs4_ol_stateid *stp, struct nfs4_file *fp, struct nfsd4_open *open) {
struct nfs4_openowner *oo = open->op_openowner;
+ atomic_inc(&stp->st_stid.sc_count);
stp->st_stid.sc_type = NFS4_OPEN_STID;
- INIT_LIST_HEAD(&stp->st_lockowners);
- list_add(&stp->st_perstateowner, &oo->oo_owner.so_stateids);
- list_add(&stp->st_perfile, &fp->fi_stateids);
+ INIT_LIST_HEAD(&stp->st_locks);
stp->st_stateowner = &oo->oo_owner;
+ atomic_inc(&stp->st_stateowner->so_count);
get_nfs4_file(fp);
- stp->st_file = fp;
+ stp->st_stid.sc_file = fp;
stp->st_access_bmap = 0;
stp->st_deny_bmap = 0;
- set_access(open->op_share_access, stp);
- set_deny(open->op_share_deny, stp);
stp->st_openstp = NULL;
+ spin_lock(&oo->oo_owner.so_client->cl_lock);
+ list_add(&stp->st_perstateowner, &oo->oo_owner.so_stateids);
+ spin_lock(&fp->fi_lock);
+ list_add(&stp->st_perfile, &fp->fi_stateids);
+ spin_unlock(&fp->fi_lock);
+ spin_unlock(&oo->oo_owner.so_client->cl_lock);
}
+/*
+ * In the 4.0 case we need to keep the owners around a little while to handle
+ * CLOSE replay. We still do need to release any file access that is held by
+ * them before returning however.
+ */
static void
-move_to_close_lru(struct nfs4_openowner *oo, struct net *net)
+move_to_close_lru(struct nfs4_ol_stateid *s, struct net *net)
{
- struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+ struct nfs4_ol_stateid *last;
+ struct nfs4_openowner *oo = openowner(s->st_stateowner);
+ struct nfsd_net *nn = net_generic(s->st_stid.sc_client->net,
+ nfsd_net_id);
dprintk("NFSD: move_to_close_lru nfs4_openowner %p\n", oo);
+ /*
+ * We know that we hold one reference via nfsd4_close, and another
+ * "persistent" reference for the client. If the refcount is higher
+ * than 2, then there are still calls in progress that are using this
+ * stateid. We can't put the sc_file reference until they are finished.
+ * Wait for the refcount to drop to 2. Since it has been unhashed,
+ * there should be no danger of the refcount going back up again at
+ * this point.
+ */
+ wait_event(close_wq, atomic_read(&s->st_stid.sc_count) == 2);
+
+ release_all_access(s);
+ if (s->st_stid.sc_file) {
+ put_nfs4_file(s->st_stid.sc_file);
+ s->st_stid.sc_file = NULL;
+ }
+
+ spin_lock(&nn->client_lock);
+ last = oo->oo_last_closed_stid;
+ oo->oo_last_closed_stid = s;
list_move_tail(&oo->oo_close_lru, &nn->close_lru);
oo->oo_time = get_seconds();
+ spin_unlock(&nn->client_lock);
+ if (last)
+ nfs4_put_stid(&last->st_stid);
}
-static int
-same_owner_str(struct nfs4_stateowner *sop, struct xdr_netobj *owner,
- clientid_t *clid)
+/* search file_hashtbl[] for file */
+static struct nfs4_file *
+find_file_locked(struct knfsd_fh *fh)
{
- return (sop->so_owner.len == owner->len) &&
- 0 == memcmp(sop->so_owner.data, owner->data, owner->len) &&
- (sop->so_client->cl_clientid.cl_id == clid->cl_id);
-}
+ unsigned int hashval = file_hashval(fh);
+ struct nfs4_file *fp;
-static struct nfs4_openowner *
-find_openstateowner_str(unsigned int hashval, struct nfsd4_open *open,
- bool sessions, struct nfsd_net *nn)
-{
- struct nfs4_stateowner *so;
- struct nfs4_openowner *oo;
- struct nfs4_client *clp;
+ lockdep_assert_held(&state_lock);
- list_for_each_entry(so, &nn->ownerstr_hashtbl[hashval], so_strhash) {
- if (!so->so_is_open_owner)
- continue;
- if (same_owner_str(so, &open->op_owner, &open->op_clientid)) {
- oo = openowner(so);
- clp = oo->oo_owner.so_client;
- if ((bool)clp->cl_minorversion != sessions)
- return NULL;
- renew_client(oo->oo_owner.so_client);
- return oo;
+ hlist_for_each_entry(fp, &file_hashtbl[hashval], fi_hash) {
+ if (nfsd_fh_match(&fp->fi_fhandle, fh)) {
+ get_nfs4_file(fp);
+ return fp;
}
}
return NULL;
}
-/* search file_hashtbl[] for file */
static struct nfs4_file *
-find_file(struct inode *ino)
+find_file(struct knfsd_fh *fh)
{
- unsigned int hashval = file_hashval(ino);
struct nfs4_file *fp;
spin_lock(&state_lock);
- hlist_for_each_entry(fp, &file_hashtbl[hashval], fi_hash) {
- if (fp->fi_inode == ino) {
- get_nfs4_file(fp);
- spin_unlock(&state_lock);
- return fp;
- }
+ fp = find_file_locked(fh);
+ spin_unlock(&state_lock);
+ return fp;
+}
+
+static struct nfs4_file *
+find_or_add_file(struct nfs4_file *new, struct knfsd_fh *fh)
+{
+ struct nfs4_file *fp;
+
+ spin_lock(&state_lock);
+ fp = find_file_locked(fh);
+ if (fp == NULL) {
+ nfsd4_init_file(new, fh);
+ fp = new;
}
spin_unlock(&state_lock);
- return NULL;
+
+ return fp;
}
/*
@@ -2807,47 +3334,53 @@ find_file(struct inode *ino)
static __be32
nfs4_share_conflict(struct svc_fh *current_fh, unsigned int deny_type)
{
- struct inode *ino = current_fh->fh_dentry->d_inode;
struct nfs4_file *fp;
- struct nfs4_ol_stateid *stp;
- __be32 ret;
+ __be32 ret = nfs_ok;
- fp = find_file(ino);
+ fp = find_file(&current_fh->fh_handle);
if (!fp)
- return nfs_ok;
- ret = nfserr_locked;
- /* Search for conflicting share reservations */
- list_for_each_entry(stp, &fp->fi_stateids, st_perfile) {
- if (test_deny(deny_type, stp) ||
- test_deny(NFS4_SHARE_DENY_BOTH, stp))
- goto out;
- }
- ret = nfs_ok;
-out:
+ return ret;
+ /* Check for conflicting share reservations */
+ spin_lock(&fp->fi_lock);
+ if (fp->fi_share_deny & deny_type)
+ ret = nfserr_locked;
+ spin_unlock(&fp->fi_lock);
put_nfs4_file(fp);
return ret;
}
-static void nfsd_break_one_deleg(struct nfs4_delegation *dp)
+void nfsd4_prepare_cb_recall(struct nfs4_delegation *dp)
{
- struct nfs4_client *clp = dp->dl_stid.sc_client;
- struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
+ struct nfsd_net *nn = net_generic(dp->dl_stid.sc_client->net,
+ nfsd_net_id);
- lockdep_assert_held(&state_lock);
- /* We're assuming the state code never drops its reference
+ block_delegations(&dp->dl_stid.sc_file->fi_fhandle);
+
+ /*
+ * We can't do this in nfsd_break_deleg_cb because it is
+ * already holding inode->i_lock.
+ *
+ * If the dl_time != 0, then we know that it has already been
+ * queued for a lease break. Don't queue it again.
+ */
+ spin_lock(&state_lock);
+ if (dp->dl_time == 0) {
+ dp->dl_time = get_seconds();
+ list_add_tail(&dp->dl_recall_lru, &nn->del_recall_lru);
+ }
+ spin_unlock(&state_lock);
+}
+
+static void nfsd_break_one_deleg(struct nfs4_delegation *dp)
+{
+ /*
+ * We're assuming the state code never drops its reference
* without first removing the lease. Since we're in this lease
* callback (and since the lease code is serialized by the kernel
* lock) we know the server hasn't removed the lease yet, we know
- * it's safe to take a reference: */
- atomic_inc(&dp->dl_count);
-
- list_add_tail(&dp->dl_recall_lru, &nn->del_recall_lru);
-
- /* Only place dl_time is set; protected by i_lock: */
- dp->dl_time = get_seconds();
-
- block_delegations(&dp->dl_fh);
-
+ * it's safe to take a reference.
+ */
+ atomic_inc(&dp->dl_stid.sc_count);
nfsd4_cb_recall(dp);
}
@@ -2872,11 +3405,20 @@ static void nfsd_break_deleg_cb(struct file_lock *fl)
*/
fl->fl_break_time = 0;
- spin_lock(&state_lock);
+ spin_lock(&fp->fi_lock);
fp->fi_had_conflict = true;
- list_for_each_entry(dp, &fp->fi_delegations, dl_perfile)
- nfsd_break_one_deleg(dp);
- spin_unlock(&state_lock);
+ /*
+ * If there are no delegations on the list, then we can't count on this
+ * lease ever being cleaned up. Set the fl_break_time to jiffies so that
+ * time_out_leases will do it ASAP. The fact that fi_had_conflict is now
+ * true should keep any new delegations from being hashed.
+ */
+ if (list_empty(&fp->fi_delegations))
+ fl->fl_break_time = jiffies;
+ else
+ list_for_each_entry(dp, &fp->fi_delegations, dl_perfile)
+ nfsd_break_one_deleg(dp);
+ spin_unlock(&fp->fi_lock);
}
static
@@ -2904,6 +3446,42 @@ static __be32 nfsd4_check_seqid(struct nfsd4_compound_state *cstate, struct nfs4
return nfserr_bad_seqid;
}
+static __be32 lookup_clientid(clientid_t *clid,
+ struct nfsd4_compound_state *cstate,
+ struct nfsd_net *nn)
+{
+ struct nfs4_client *found;
+
+ if (cstate->clp) {
+ found = cstate->clp;
+ if (!same_clid(&found->cl_clientid, clid))
+ return nfserr_stale_clientid;
+ return nfs_ok;
+ }
+
+ if (STALE_CLIENTID(clid, nn))
+ return nfserr_stale_clientid;
+
+ /*
+ * For v4.1+ we get the client in the SEQUENCE op. If we don't have one
+ * cached already then we know this is for is for v4.0 and "sessions"
+ * will be false.
+ */
+ WARN_ON_ONCE(cstate->session);
+ spin_lock(&nn->client_lock);
+ found = find_confirmed_client(clid, false, nn);
+ if (!found) {
+ spin_unlock(&nn->client_lock);
+ return nfserr_expired;
+ }
+ atomic_inc(&found->cl_refcount);
+ spin_unlock(&nn->client_lock);
+
+ /* Cache the nfs4_client in cstate! */
+ cstate->clp = found;
+ return nfs_ok;
+}
+
__be32
nfsd4_process_open1(struct nfsd4_compound_state *cstate,
struct nfsd4_open *open, struct nfsd_net *nn)
@@ -2924,19 +3502,19 @@ nfsd4_process_open1(struct nfsd4_compound_state *cstate,
if (open->op_file == NULL)
return nfserr_jukebox;
- strhashval = ownerstr_hashval(clientid->cl_id, &open->op_owner);
- oo = find_openstateowner_str(strhashval, open, cstate->minorversion, nn);
+ status = lookup_clientid(clientid, cstate, nn);
+ if (status)
+ return status;
+ clp = cstate->clp;
+
+ strhashval = ownerstr_hashval(&open->op_owner);
+ oo = find_openstateowner_str(strhashval, open, clp);
open->op_openowner = oo;
if (!oo) {
- clp = find_confirmed_client(clientid, cstate->minorversion,
- nn);
- if (clp == NULL)
- return nfserr_expired;
goto new_owner;
}
if (!(oo->oo_flags & NFS4_OO_CONFIRMED)) {
/* Replace unconfirmed owners without checking for replay. */
- clp = oo->oo_owner.so_client;
release_openowner(oo);
open->op_openowner = NULL;
goto new_owner;
@@ -2944,15 +3522,14 @@ nfsd4_process_open1(struct nfsd4_compound_state *cstate,
status = nfsd4_check_seqid(cstate, &oo->oo_owner, open->op_seqid);
if (status)
return status;
- clp = oo->oo_owner.so_client;
goto alloc_stateid;
new_owner:
- oo = alloc_init_open_stateowner(strhashval, clp, open);
+ oo = alloc_init_open_stateowner(strhashval, open, cstate);
if (oo == NULL)
return nfserr_jukebox;
open->op_openowner = oo;
alloc_stateid:
- open->op_stp = nfs4_alloc_stateid(clp);
+ open->op_stp = nfs4_alloc_open_stateid(clp);
if (!open->op_stp)
return nfserr_jukebox;
return nfs_ok;
@@ -2994,14 +3571,18 @@ nfs4_check_deleg(struct nfs4_client *cl, struct nfsd4_open *open,
{
int flags;
__be32 status = nfserr_bad_stateid;
+ struct nfs4_delegation *deleg;
- *dp = find_deleg_stateid(cl, &open->op_delegate_stateid);
- if (*dp == NULL)
+ deleg = find_deleg_stateid(cl, &open->op_delegate_stateid);
+ if (deleg == NULL)
goto out;
flags = share_access_to_flags(open->op_share_access);
- status = nfs4_check_delegmode(*dp, flags);
- if (status)
- *dp = NULL;
+ status = nfs4_check_delegmode(deleg, flags);
+ if (status) {
+ nfs4_put_stid(&deleg->dl_stid);
+ goto out;
+ }
+ *dp = deleg;
out:
if (!nfsd4_is_deleg_cur(open))
return nfs_ok;
@@ -3011,24 +3592,25 @@ out:
return nfs_ok;
}
-static __be32
-nfs4_check_open(struct nfs4_file *fp, struct nfsd4_open *open, struct nfs4_ol_stateid **stpp)
+static struct nfs4_ol_stateid *
+nfsd4_find_existing_open(struct nfs4_file *fp, struct nfsd4_open *open)
{
- struct nfs4_ol_stateid *local;
+ struct nfs4_ol_stateid *local, *ret = NULL;
struct nfs4_openowner *oo = open->op_openowner;
+ spin_lock(&fp->fi_lock);
list_for_each_entry(local, &fp->fi_stateids, st_perfile) {
/* ignore lock owners */
if (local->st_stateowner->so_is_open_owner == 0)
continue;
- /* remember if we have seen this open owner */
- if (local->st_stateowner == &oo->oo_owner)
- *stpp = local;
- /* check for conflicting share reservations */
- if (!test_share(local, open))
- return nfserr_share_denied;
+ if (local->st_stateowner == &oo->oo_owner) {
+ ret = local;
+ atomic_inc(&ret->st_stid.sc_count);
+ break;
+ }
}
- return nfs_ok;
+ spin_unlock(&fp->fi_lock);
+ return ret;
}
static inline int nfs4_access_to_access(u32 nfs4_access)
@@ -3042,24 +3624,6 @@ static inline int nfs4_access_to_access(u32 nfs4_access)
return flags;
}
-static __be32 nfs4_get_vfs_file(struct svc_rqst *rqstp, struct nfs4_file *fp,
- struct svc_fh *cur_fh, struct nfsd4_open *open)
-{
- __be32 status;
- int oflag = nfs4_access_to_omode(open->op_share_access);
- int access = nfs4_access_to_access(open->op_share_access);
-
- if (!fp->fi_fds[oflag]) {
- status = nfsd_open(rqstp, cur_fh, S_IFREG, access,
- &fp->fi_fds[oflag]);
- if (status)
- return status;
- }
- nfs4_file_get_access(fp, oflag);
-
- return nfs_ok;
-}
-
static inline __be32
nfsd4_truncate(struct svc_rqst *rqstp, struct svc_fh *fh,
struct nfsd4_open *open)
@@ -3075,34 +3639,99 @@ nfsd4_truncate(struct svc_rqst *rqstp, struct svc_fh *fh,
return nfsd_setattr(rqstp, fh, &iattr, 0, (time_t)0);
}
-static __be32
-nfs4_upgrade_open(struct svc_rqst *rqstp, struct nfs4_file *fp, struct svc_fh *cur_fh, struct nfs4_ol_stateid *stp, struct nfsd4_open *open)
+static __be32 nfs4_get_vfs_file(struct svc_rqst *rqstp, struct nfs4_file *fp,
+ struct svc_fh *cur_fh, struct nfs4_ol_stateid *stp,
+ struct nfsd4_open *open)
{
- u32 op_share_access = open->op_share_access;
- bool new_access;
+ struct file *filp = NULL;
__be32 status;
+ int oflag = nfs4_access_to_omode(open->op_share_access);
+ int access = nfs4_access_to_access(open->op_share_access);
+ unsigned char old_access_bmap, old_deny_bmap;
- new_access = !test_access(op_share_access, stp);
- if (new_access) {
- status = nfs4_get_vfs_file(rqstp, fp, cur_fh, open);
- if (status)
- return status;
+ spin_lock(&fp->fi_lock);
+
+ /*
+ * Are we trying to set a deny mode that would conflict with
+ * current access?
+ */
+ status = nfs4_file_check_deny(fp, open->op_share_deny);
+ if (status != nfs_ok) {
+ spin_unlock(&fp->fi_lock);
+ goto out;
}
- status = nfsd4_truncate(rqstp, cur_fh, open);
- if (status) {
- if (new_access) {
- int oflag = nfs4_access_to_omode(op_share_access);
- nfs4_file_put_access(fp, oflag);
- }
- return status;
+
+ /* set access to the file */
+ status = nfs4_file_get_access(fp, open->op_share_access);
+ if (status != nfs_ok) {
+ spin_unlock(&fp->fi_lock);
+ goto out;
}
- /* remember the open */
- set_access(op_share_access, stp);
+
+ /* Set access bits in stateid */
+ old_access_bmap = stp->st_access_bmap;
+ set_access(open->op_share_access, stp);
+
+ /* Set new deny mask */
+ old_deny_bmap = stp->st_deny_bmap;
set_deny(open->op_share_deny, stp);
+ fp->fi_share_deny |= (open->op_share_deny & NFS4_SHARE_DENY_BOTH);
- return nfs_ok;
+ if (!fp->fi_fds[oflag]) {
+ spin_unlock(&fp->fi_lock);
+ status = nfsd_open(rqstp, cur_fh, S_IFREG, access, &filp);
+ if (status)
+ goto out_put_access;
+ spin_lock(&fp->fi_lock);
+ if (!fp->fi_fds[oflag]) {
+ fp->fi_fds[oflag] = filp;
+ filp = NULL;
+ }
+ }
+ spin_unlock(&fp->fi_lock);
+ if (filp)
+ fput(filp);
+
+ status = nfsd4_truncate(rqstp, cur_fh, open);
+ if (status)
+ goto out_put_access;
+out:
+ return status;
+out_put_access:
+ stp->st_access_bmap = old_access_bmap;
+ nfs4_file_put_access(fp, open->op_share_access);
+ reset_union_bmap_deny(bmap_to_share_mode(old_deny_bmap), stp);
+ goto out;
}
+static __be32
+nfs4_upgrade_open(struct svc_rqst *rqstp, struct nfs4_file *fp, struct svc_fh *cur_fh, struct nfs4_ol_stateid *stp, struct nfsd4_open *open)
+{
+ __be32 status;
+ unsigned char old_deny_bmap;
+
+ if (!test_access(open->op_share_access, stp))
+ return nfs4_get_vfs_file(rqstp, fp, cur_fh, stp, open);
+
+ /* test and set deny mode */
+ spin_lock(&fp->fi_lock);
+ status = nfs4_file_check_deny(fp, open->op_share_deny);
+ if (status == nfs_ok) {
+ old_deny_bmap = stp->st_deny_bmap;
+ set_deny(open->op_share_deny, stp);
+ fp->fi_share_deny |=
+ (open->op_share_deny & NFS4_SHARE_DENY_BOTH);
+ }
+ spin_unlock(&fp->fi_lock);
+
+ if (status != nfs_ok)
+ return status;
+
+ status = nfsd4_truncate(rqstp, cur_fh, open);
+ if (status != nfs_ok)
+ reset_union_bmap_deny(old_deny_bmap, stp);
+ return status;
+}
static void
nfs4_set_claim_prev(struct nfsd4_open *open, bool has_session)
@@ -3123,7 +3752,7 @@ static bool nfsd4_cb_channel_good(struct nfs4_client *clp)
return clp->cl_minorversion && clp->cl_cb_state == NFSD4_CB_UNKNOWN;
}
-static struct file_lock *nfs4_alloc_init_lease(struct nfs4_delegation *dp, int flag)
+static struct file_lock *nfs4_alloc_init_lease(struct nfs4_file *fp, int flag)
{
struct file_lock *fl;
@@ -3135,53 +3764,101 @@ static struct file_lock *nfs4_alloc_init_lease(struct nfs4_delegation *dp, int f
fl->fl_flags = FL_DELEG;
fl->fl_type = flag == NFS4_OPEN_DELEGATE_READ? F_RDLCK: F_WRLCK;
fl->fl_end = OFFSET_MAX;
- fl->fl_owner = (fl_owner_t)(dp->dl_file);
+ fl->fl_owner = (fl_owner_t)fp;
fl->fl_pid = current->tgid;
return fl;
}
static int nfs4_setlease(struct nfs4_delegation *dp)
{
- struct nfs4_file *fp = dp->dl_file;
+ struct nfs4_file *fp = dp->dl_stid.sc_file;
struct file_lock *fl;
- int status;
+ struct file *filp;
+ int status = 0;
- fl = nfs4_alloc_init_lease(dp, NFS4_OPEN_DELEGATE_READ);
+ fl = nfs4_alloc_init_lease(fp, NFS4_OPEN_DELEGATE_READ);
if (!fl)
return -ENOMEM;
- fl->fl_file = find_readable_file(fp);
- status = vfs_setlease(fl->fl_file, fl->fl_type, &fl);
- if (status)
- goto out_free;
+ filp = find_readable_file(fp);
+ if (!filp) {
+ /* We should always have a readable file here */
+ WARN_ON_ONCE(1);
+ return -EBADF;
+ }
+ fl->fl_file = filp;
+ status = vfs_setlease(filp, fl->fl_type, &fl);
+ if (status) {
+ locks_free_lock(fl);
+ goto out_fput;
+ }
+ spin_lock(&state_lock);
+ spin_lock(&fp->fi_lock);
+ /* Did the lease get broken before we took the lock? */
+ status = -EAGAIN;
+ if (fp->fi_had_conflict)
+ goto out_unlock;
+ /* Race breaker */
+ if (fp->fi_lease) {
+ status = 0;
+ atomic_inc(&fp->fi_delegees);
+ hash_delegation_locked(dp, fp);
+ goto out_unlock;
+ }
fp->fi_lease = fl;
- fp->fi_deleg_file = get_file(fl->fl_file);
+ fp->fi_deleg_file = filp;
atomic_set(&fp->fi_delegees, 1);
- spin_lock(&state_lock);
hash_delegation_locked(dp, fp);
+ spin_unlock(&fp->fi_lock);
spin_unlock(&state_lock);
return 0;
-out_free:
- locks_free_lock(fl);
+out_unlock:
+ spin_unlock(&fp->fi_lock);
+ spin_unlock(&state_lock);
+out_fput:
+ fput(filp);
return status;
}
-static int nfs4_set_delegation(struct nfs4_delegation *dp, struct nfs4_file *fp)
+static struct nfs4_delegation *
+nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh,
+ struct nfs4_file *fp)
{
+ int status;
+ struct nfs4_delegation *dp;
+
if (fp->fi_had_conflict)
- return -EAGAIN;
+ return ERR_PTR(-EAGAIN);
+
+ dp = alloc_init_deleg(clp, fh);
+ if (!dp)
+ return ERR_PTR(-ENOMEM);
+
get_nfs4_file(fp);
- dp->dl_file = fp;
- if (!fp->fi_lease)
- return nfs4_setlease(dp);
spin_lock(&state_lock);
+ spin_lock(&fp->fi_lock);
+ dp->dl_stid.sc_file = fp;
+ if (!fp->fi_lease) {
+ spin_unlock(&fp->fi_lock);
+ spin_unlock(&state_lock);
+ status = nfs4_setlease(dp);
+ goto out;
+ }
atomic_inc(&fp->fi_delegees);
if (fp->fi_had_conflict) {
- spin_unlock(&state_lock);
- return -EAGAIN;
+ status = -EAGAIN;
+ goto out_unlock;
}
hash_delegation_locked(dp, fp);
+ status = 0;
+out_unlock:
+ spin_unlock(&fp->fi_lock);
spin_unlock(&state_lock);
- return 0;
+out:
+ if (status) {
+ nfs4_put_stid(&dp->dl_stid);
+ return ERR_PTR(status);
+ }
+ return dp;
}
static void nfsd4_open_deleg_none_ext(struct nfsd4_open *open, int status)
@@ -3212,11 +3889,12 @@ static void nfsd4_open_deleg_none_ext(struct nfsd4_open *open, int status)
* proper support for them.
*/
static void
-nfs4_open_delegation(struct net *net, struct svc_fh *fh,
- struct nfsd4_open *open, struct nfs4_ol_stateid *stp)
+nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open,
+ struct nfs4_ol_stateid *stp)
{
struct nfs4_delegation *dp;
- struct nfs4_openowner *oo = container_of(stp->st_stateowner, struct nfs4_openowner, oo_owner);
+ struct nfs4_openowner *oo = openowner(stp->st_stateowner);
+ struct nfs4_client *clp = stp->st_stid.sc_client;
int cb_up;
int status = 0;
@@ -3235,7 +3913,7 @@ nfs4_open_delegation(struct net *net, struct svc_fh *fh,
* Let's not give out any delegations till everyone's
* had the chance to reclaim theirs....
*/
- if (locks_in_grace(net))
+ if (locks_in_grace(clp->net))
goto out_no_deleg;
if (!cb_up || !(oo->oo_flags & NFS4_OO_CONFIRMED))
goto out_no_deleg;
@@ -3254,21 +3932,17 @@ nfs4_open_delegation(struct net *net, struct svc_fh *fh,
default:
goto out_no_deleg;
}
- dp = alloc_init_deleg(oo->oo_owner.so_client, stp, fh);
- if (dp == NULL)
+ dp = nfs4_set_delegation(clp, fh, stp->st_stid.sc_file);
+ if (IS_ERR(dp))
goto out_no_deleg;
- status = nfs4_set_delegation(dp, stp->st_file);
- if (status)
- goto out_free;
memcpy(&open->op_delegate_stateid, &dp->dl_stid.sc_stateid, sizeof(dp->dl_stid.sc_stateid));
dprintk("NFSD: delegation stateid=" STATEID_FMT "\n",
STATEID_VAL(&dp->dl_stid.sc_stateid));
open->op_delegate_type = NFS4_OPEN_DELEGATE_READ;
+ nfs4_put_stid(&dp->dl_stid);
return;
-out_free:
- destroy_delegation(dp);
out_no_deleg:
open->op_delegate_type = NFS4_OPEN_DELEGATE_NONE;
if (open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS &&
@@ -3301,16 +3975,12 @@ static void nfsd4_deleg_xgrade_none_ext(struct nfsd4_open *open,
*/
}
-/*
- * called with nfs4_lock_state() held.
- */
__be32
nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open)
{
struct nfsd4_compoundres *resp = rqstp->rq_resp;
struct nfs4_client *cl = open->op_openowner->oo_owner.so_client;
struct nfs4_file *fp = NULL;
- struct inode *ino = current_fh->fh_dentry->d_inode;
struct nfs4_ol_stateid *stp = NULL;
struct nfs4_delegation *dp = NULL;
__be32 status;
@@ -3320,21 +3990,18 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
* and check for delegations in the process of being recalled.
* If not found, create the nfs4_file struct
*/
- fp = find_file(ino);
- if (fp) {
- if ((status = nfs4_check_open(fp, open, &stp)))
- goto out;
+ fp = find_or_add_file(open->op_file, &current_fh->fh_handle);
+ if (fp != open->op_file) {
status = nfs4_check_deleg(cl, open, &dp);
if (status)
goto out;
+ stp = nfsd4_find_existing_open(fp, open);
} else {
+ open->op_file = NULL;
status = nfserr_bad_stateid;
if (nfsd4_is_deleg_cur(open))
goto out;
status = nfserr_jukebox;
- fp = open->op_file;
- open->op_file = NULL;
- nfsd4_init_file(fp, ino);
}
/*
@@ -3347,22 +4014,19 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
if (status)
goto out;
} else {
- status = nfs4_get_vfs_file(rqstp, fp, current_fh, open);
- if (status)
- goto out;
- status = nfsd4_truncate(rqstp, current_fh, open);
- if (status)
- goto out;
stp = open->op_stp;
open->op_stp = NULL;
init_open_stateid(stp, fp, open);
+ status = nfs4_get_vfs_file(rqstp, fp, current_fh, stp, open);
+ if (status) {
+ release_open_stateid(stp);
+ goto out;
+ }
}
update_stateid(&stp->st_stid.sc_stateid);
memcpy(&open->op_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t));
if (nfsd4_has_session(&resp->cstate)) {
- open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED;
-
if (open->op_deleg_want & NFS4_SHARE_WANT_NO_DELEG) {
open->op_delegate_type = NFS4_OPEN_DELEGATE_NONE_EXT;
open->op_why_no_deleg = WND4_NOT_WANTED;
@@ -3374,7 +4038,7 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
* Attempt to hand out a delegation. No error return, because the
* OPEN succeeds even if we fail.
*/
- nfs4_open_delegation(SVC_NET(rqstp), current_fh, open, stp);
+ nfs4_open_delegation(current_fh, open, stp);
nodeleg:
status = nfs_ok;
@@ -3397,41 +4061,27 @@ out:
if (!(open->op_openowner->oo_flags & NFS4_OO_CONFIRMED) &&
!nfsd4_has_session(&resp->cstate))
open->op_rflags |= NFS4_OPEN_RESULT_CONFIRM;
+ if (dp)
+ nfs4_put_stid(&dp->dl_stid);
+ if (stp)
+ nfs4_put_stid(&stp->st_stid);
return status;
}
-void nfsd4_cleanup_open_state(struct nfsd4_open *open, __be32 status)
+void nfsd4_cleanup_open_state(struct nfsd4_compound_state *cstate,
+ struct nfsd4_open *open, __be32 status)
{
if (open->op_openowner) {
- struct nfs4_openowner *oo = open->op_openowner;
+ struct nfs4_stateowner *so = &open->op_openowner->oo_owner;
- if (!list_empty(&oo->oo_owner.so_stateids))
- list_del_init(&oo->oo_close_lru);
- if (oo->oo_flags & NFS4_OO_NEW) {
- if (status) {
- release_openowner(oo);
- open->op_openowner = NULL;
- } else
- oo->oo_flags &= ~NFS4_OO_NEW;
- }
+ nfsd4_cstate_assign_replay(cstate, so);
+ nfs4_put_stateowner(so);
}
if (open->op_file)
nfsd4_free_file(open->op_file);
if (open->op_stp)
- free_generic_stateid(open->op_stp);
-}
-
-static __be32 lookup_clientid(clientid_t *clid, bool session, struct nfsd_net *nn, struct nfs4_client **clp)
-{
- struct nfs4_client *found;
-
- if (STALE_CLIENTID(clid, nn))
- return nfserr_stale_clientid;
- found = find_confirmed_client(clid, session, nn);
- if (clp)
- *clp = found;
- return found ? nfs_ok : nfserr_expired;
+ nfs4_put_stid(&open->op_stp->st_stid);
}
__be32
@@ -3442,19 +4092,18 @@ nfsd4_renew(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
__be32 status;
struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
- nfs4_lock_state();
dprintk("process_renew(%08x/%08x): starting\n",
clid->cl_boot, clid->cl_id);
- status = lookup_clientid(clid, cstate->minorversion, nn, &clp);
+ status = lookup_clientid(clid, cstate, nn);
if (status)
goto out;
+ clp = cstate->clp;
status = nfserr_cb_path_down;
if (!list_empty(&clp->cl_delegations)
&& clp->cl_cb_state != NFSD4_CB_UP)
goto out;
status = nfs_ok;
out:
- nfs4_unlock_state();
return status;
}
@@ -3483,12 +4132,11 @@ nfs4_laundromat(struct nfsd_net *nn)
struct nfs4_client *clp;
struct nfs4_openowner *oo;
struct nfs4_delegation *dp;
+ struct nfs4_ol_stateid *stp;
struct list_head *pos, *next, reaplist;
time_t cutoff = get_seconds() - nn->nfsd4_lease;
time_t t, new_timeo = nn->nfsd4_lease;
- nfs4_lock_state();
-
dprintk("NFSD: laundromat service - starting\n");
nfsd4_end_grace(nn);
INIT_LIST_HEAD(&reaplist);
@@ -3505,13 +4153,14 @@ nfs4_laundromat(struct nfsd_net *nn)
clp->cl_clientid.cl_id);
continue;
}
- list_move(&clp->cl_lru, &reaplist);
+ list_add(&clp->cl_lru, &reaplist);
}
spin_unlock(&nn->client_lock);
list_for_each_safe(pos, next, &reaplist) {
clp = list_entry(pos, struct nfs4_client, cl_lru);
dprintk("NFSD: purging unused client (clientid %08x)\n",
clp->cl_clientid.cl_id);
+ list_del_init(&clp->cl_lru);
expire_client(clp);
}
spin_lock(&state_lock);
@@ -3524,24 +4173,37 @@ nfs4_laundromat(struct nfsd_net *nn)
new_timeo = min(new_timeo, t);
break;
}
- list_move(&dp->dl_recall_lru, &reaplist);
+ unhash_delegation_locked(dp);
+ list_add(&dp->dl_recall_lru, &reaplist);
}
spin_unlock(&state_lock);
- list_for_each_safe(pos, next, &reaplist) {
- dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
+ while (!list_empty(&reaplist)) {
+ dp = list_first_entry(&reaplist, struct nfs4_delegation,
+ dl_recall_lru);
+ list_del_init(&dp->dl_recall_lru);
revoke_delegation(dp);
}
- list_for_each_safe(pos, next, &nn->close_lru) {
- oo = container_of(pos, struct nfs4_openowner, oo_close_lru);
- if (time_after((unsigned long)oo->oo_time, (unsigned long)cutoff)) {
+
+ spin_lock(&nn->client_lock);
+ while (!list_empty(&nn->close_lru)) {
+ oo = list_first_entry(&nn->close_lru, struct nfs4_openowner,
+ oo_close_lru);
+ if (time_after((unsigned long)oo->oo_time,
+ (unsigned long)cutoff)) {
t = oo->oo_time - cutoff;
new_timeo = min(new_timeo, t);
break;
}
- release_openowner(oo);
+ list_del_init(&oo->oo_close_lru);
+ stp = oo->oo_last_closed_stid;
+ oo->oo_last_closed_stid = NULL;
+ spin_unlock(&nn->client_lock);
+ nfs4_put_stid(&stp->st_stid);
+ spin_lock(&nn->client_lock);
}
+ spin_unlock(&nn->client_lock);
+
new_timeo = max_t(time_t, new_timeo, NFSD_LAUNDROMAT_MINTIMEOUT);
- nfs4_unlock_state();
return new_timeo;
}
@@ -3564,7 +4226,7 @@ laundromat_main(struct work_struct *laundry)
static inline __be32 nfs4_check_fh(struct svc_fh *fhp, struct nfs4_ol_stateid *stp)
{
- if (fhp->fh_dentry->d_inode != stp->st_file->fi_inode)
+ if (!nfsd_fh_match(&fhp->fh_handle, &stp->st_stid.sc_file->fi_fhandle))
return nfserr_bad_stateid;
return nfs_ok;
}
@@ -3666,10 +4328,10 @@ static __be32 nfsd4_validate_stateid(struct nfs4_client *cl, stateid_t *stateid)
{
struct nfs4_stid *s;
struct nfs4_ol_stateid *ols;
- __be32 status;
+ __be32 status = nfserr_bad_stateid;
if (ZERO_STATEID(stateid) || ONE_STATEID(stateid))
- return nfserr_bad_stateid;
+ return status;
/* Client debugging aid. */
if (!same_clid(&stateid->si_opaque.so_clid, &cl->cl_clientid)) {
char addr_str[INET6_ADDRSTRLEN];
@@ -3677,53 +4339,62 @@ static __be32 nfsd4_validate_stateid(struct nfs4_client *cl, stateid_t *stateid)
sizeof(addr_str));
pr_warn_ratelimited("NFSD: client %s testing state ID "
"with incorrect client ID\n", addr_str);
- return nfserr_bad_stateid;
+ return status;
}
- s = find_stateid(cl, stateid);
+ spin_lock(&cl->cl_lock);
+ s = find_stateid_locked(cl, stateid);
if (!s)
- return nfserr_bad_stateid;
+ goto out_unlock;
status = check_stateid_generation(stateid, &s->sc_stateid, 1);
if (status)
- return status;
+ goto out_unlock;
switch (s->sc_type) {
case NFS4_DELEG_STID:
- return nfs_ok;
+ status = nfs_ok;
+ break;
case NFS4_REVOKED_DELEG_STID:
- return nfserr_deleg_revoked;
+ status = nfserr_deleg_revoked;
+ break;
case NFS4_OPEN_STID:
case NFS4_LOCK_STID:
ols = openlockstateid(s);
if (ols->st_stateowner->so_is_open_owner
&& !(openowner(ols->st_stateowner)->oo_flags
& NFS4_OO_CONFIRMED))
- return nfserr_bad_stateid;
- return nfs_ok;
+ status = nfserr_bad_stateid;
+ else
+ status = nfs_ok;
+ break;
default:
printk("unknown stateid type %x\n", s->sc_type);
+ /* Fallthrough */
case NFS4_CLOSED_STID:
- return nfserr_bad_stateid;
+ case NFS4_CLOSED_DELEG_STID:
+ status = nfserr_bad_stateid;
}
+out_unlock:
+ spin_unlock(&cl->cl_lock);
+ return status;
}
-static __be32 nfsd4_lookup_stateid(stateid_t *stateid, unsigned char typemask,
- struct nfs4_stid **s, bool sessions,
- struct nfsd_net *nn)
+static __be32
+nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
+ stateid_t *stateid, unsigned char typemask,
+ struct nfs4_stid **s, struct nfsd_net *nn)
{
- struct nfs4_client *cl;
__be32 status;
if (ZERO_STATEID(stateid) || ONE_STATEID(stateid))
return nfserr_bad_stateid;
- status = lookup_clientid(&stateid->si_opaque.so_clid, sessions,
- nn, &cl);
+ status = lookup_clientid(&stateid->si_opaque.so_clid, cstate, nn);
if (status == nfserr_stale_clientid) {
- if (sessions)
+ if (cstate->session)
return nfserr_bad_stateid;
return nfserr_stale_stateid;
}
if (status)
return status;
- *s = find_stateid_by_type(cl, stateid, typemask);
+ *s = find_stateid_by_type(cstate->clp, stateid, typemask);
if (!*s)
return nfserr_bad_stateid;
return nfs_ok;
@@ -3754,12 +4425,11 @@ nfs4_preprocess_stateid_op(struct net *net, struct nfsd4_compound_state *cstate,
if (ZERO_STATEID(stateid) || ONE_STATEID(stateid))
return check_special_stateids(net, current_fh, stateid, flags);
- nfs4_lock_state();
-
- status = nfsd4_lookup_stateid(stateid, NFS4_DELEG_STID|NFS4_OPEN_STID|NFS4_LOCK_STID,
- &s, cstate->minorversion, nn);
+ status = nfsd4_lookup_stateid(cstate, stateid,
+ NFS4_DELEG_STID|NFS4_OPEN_STID|NFS4_LOCK_STID,
+ &s, nn);
if (status)
- goto out;
+ return status;
status = check_stateid_generation(stateid, &s->sc_stateid, nfsd4_has_session(cstate));
if (status)
goto out;
@@ -3770,12 +4440,13 @@ nfs4_preprocess_stateid_op(struct net *net, struct nfsd4_compound_state *cstate,
if (status)
goto out;
if (filpp) {
- file = dp->dl_file->fi_deleg_file;
+ file = dp->dl_stid.sc_file->fi_deleg_file;
if (!file) {
WARN_ON_ONCE(1);
status = nfserr_serverfault;
goto out;
}
+ get_file(file);
}
break;
case NFS4_OPEN_STID:
@@ -3791,10 +4462,12 @@ nfs4_preprocess_stateid_op(struct net *net, struct nfsd4_compound_state *cstate,
if (status)
goto out;
if (filpp) {
+ struct nfs4_file *fp = stp->st_stid.sc_file;
+
if (flags & RD_STATE)
- file = find_readable_file(stp->st_file);
+ file = find_readable_file(fp);
else
- file = find_writeable_file(stp->st_file);
+ file = find_writeable_file(fp);
}
break;
default:
@@ -3803,28 +4476,12 @@ nfs4_preprocess_stateid_op(struct net *net, struct nfsd4_compound_state *cstate,
}
status = nfs_ok;
if (file)
- *filpp = get_file(file);
+ *filpp = file;
out:
- nfs4_unlock_state();
+ nfs4_put_stid(s);
return status;
}
-static __be32
-nfsd4_free_lock_stateid(struct nfs4_ol_stateid *stp)
-{
- struct nfs4_lockowner *lo = lockowner(stp->st_stateowner);
-
- if (check_for_locks(stp->st_file, lo))
- return nfserr_locks_held;
- /*
- * Currently there's a 1-1 lock stateid<->lockowner
- * correspondance, and we have to delete the lockowner when we
- * delete the lock stateid:
- */
- release_lockowner(lo);
- return nfs_ok;
-}
-
/*
* Test if the stateid is valid
*/
@@ -3835,11 +4492,9 @@ nfsd4_test_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
struct nfsd4_test_stateid_id *stateid;
struct nfs4_client *cl = cstate->session->se_client;
- nfs4_lock_state();
list_for_each_entry(stateid, &test_stateid->ts_stateid_list, ts_id_list)
stateid->ts_id_status =
nfsd4_validate_stateid(cl, &stateid->ts_id_stateid);
- nfs4_unlock_state();
return nfs_ok;
}
@@ -3851,37 +4506,50 @@ nfsd4_free_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
stateid_t *stateid = &free_stateid->fr_stateid;
struct nfs4_stid *s;
struct nfs4_delegation *dp;
+ struct nfs4_ol_stateid *stp;
struct nfs4_client *cl = cstate->session->se_client;
__be32 ret = nfserr_bad_stateid;
- nfs4_lock_state();
- s = find_stateid(cl, stateid);
+ spin_lock(&cl->cl_lock);
+ s = find_stateid_locked(cl, stateid);
if (!s)
- goto out;
+ goto out_unlock;
switch (s->sc_type) {
case NFS4_DELEG_STID:
ret = nfserr_locks_held;
- goto out;
+ break;
case NFS4_OPEN_STID:
- case NFS4_LOCK_STID:
ret = check_stateid_generation(stateid, &s->sc_stateid, 1);
if (ret)
- goto out;
- if (s->sc_type == NFS4_LOCK_STID)
- ret = nfsd4_free_lock_stateid(openlockstateid(s));
- else
- ret = nfserr_locks_held;
+ break;
+ ret = nfserr_locks_held;
break;
+ case NFS4_LOCK_STID:
+ ret = check_stateid_generation(stateid, &s->sc_stateid, 1);
+ if (ret)
+ break;
+ stp = openlockstateid(s);
+ ret = nfserr_locks_held;
+ if (check_for_locks(stp->st_stid.sc_file,
+ lockowner(stp->st_stateowner)))
+ break;
+ unhash_lock_stateid(stp);
+ spin_unlock(&cl->cl_lock);
+ nfs4_put_stid(s);
+ ret = nfs_ok;
+ goto out;
case NFS4_REVOKED_DELEG_STID:
dp = delegstateid(s);
- destroy_revoked_delegation(dp);
+ list_del_init(&dp->dl_recall_lru);
+ spin_unlock(&cl->cl_lock);
+ nfs4_put_stid(s);
ret = nfs_ok;
- break;
- default:
- ret = nfserr_bad_stateid;
+ goto out;
+ /* Default falls through and returns nfserr_bad_stateid */
}
+out_unlock:
+ spin_unlock(&cl->cl_lock);
out:
- nfs4_unlock_state();
return ret;
}
@@ -3926,20 +4594,24 @@ nfs4_preprocess_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid,
{
__be32 status;
struct nfs4_stid *s;
+ struct nfs4_ol_stateid *stp = NULL;
dprintk("NFSD: %s: seqid=%d stateid = " STATEID_FMT "\n", __func__,
seqid, STATEID_VAL(stateid));
*stpp = NULL;
- status = nfsd4_lookup_stateid(stateid, typemask, &s,
- cstate->minorversion, nn);
+ status = nfsd4_lookup_stateid(cstate, stateid, typemask, &s, nn);
if (status)
return status;
- *stpp = openlockstateid(s);
- if (!nfsd4_has_session(cstate))
- cstate->replay_owner = (*stpp)->st_stateowner;
+ stp = openlockstateid(s);
+ nfsd4_cstate_assign_replay(cstate, stp->st_stateowner);
- return nfs4_seqid_op_checks(cstate, stateid, seqid, *stpp);
+ status = nfs4_seqid_op_checks(cstate, stateid, seqid, stp);
+ if (!status)
+ *stpp = stp;
+ else
+ nfs4_put_stid(&stp->st_stid);
+ return status;
}
static __be32 nfs4_preprocess_confirmed_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid,
@@ -3947,14 +4619,18 @@ static __be32 nfs4_preprocess_confirmed_seqid_op(struct nfsd4_compound_state *cs
{
__be32 status;
struct nfs4_openowner *oo;
+ struct nfs4_ol_stateid *stp;
status = nfs4_preprocess_seqid_op(cstate, seqid, stateid,
- NFS4_OPEN_STID, stpp, nn);
+ NFS4_OPEN_STID, &stp, nn);
if (status)
return status;
- oo = openowner((*stpp)->st_stateowner);
- if (!(oo->oo_flags & NFS4_OO_CONFIRMED))
+ oo = openowner(stp->st_stateowner);
+ if (!(oo->oo_flags & NFS4_OO_CONFIRMED)) {
+ nfs4_put_stid(&stp->st_stid);
return nfserr_bad_stateid;
+ }
+ *stpp = stp;
return nfs_ok;
}
@@ -3974,8 +4650,6 @@ nfsd4_open_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
if (status)
return status;
- nfs4_lock_state();
-
status = nfs4_preprocess_seqid_op(cstate,
oc->oc_seqid, &oc->oc_req_stateid,
NFS4_OPEN_STID, &stp, nn);
@@ -3984,7 +4658,7 @@ nfsd4_open_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
oo = openowner(stp->st_stateowner);
status = nfserr_bad_stateid;
if (oo->oo_flags & NFS4_OO_CONFIRMED)
- goto out;
+ goto put_stateid;
oo->oo_flags |= NFS4_OO_CONFIRMED;
update_stateid(&stp->st_stid.sc_stateid);
memcpy(&oc->oc_resp_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t));
@@ -3993,10 +4667,10 @@ nfsd4_open_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
nfsd4_client_record_create(oo->oo_owner.so_client);
status = nfs_ok;
+put_stateid:
+ nfs4_put_stid(&stp->st_stid);
out:
nfsd4_bump_seqid(cstate, status);
- if (!cstate->replay_owner)
- nfs4_unlock_state();
return status;
}
@@ -4004,7 +4678,7 @@ static inline void nfs4_stateid_downgrade_bit(struct nfs4_ol_stateid *stp, u32 a
{
if (!test_access(access, stp))
return;
- nfs4_file_put_access(stp->st_file, nfs4_access_to_omode(access));
+ nfs4_file_put_access(stp->st_stid.sc_file, access);
clear_access(access, stp);
}
@@ -4026,16 +4700,6 @@ static inline void nfs4_stateid_downgrade(struct nfs4_ol_stateid *stp, u32 to_ac
}
}
-static void
-reset_union_bmap_deny(unsigned long deny, struct nfs4_ol_stateid *stp)
-{
- int i;
- for (i = 0; i < 4; i++) {
- if ((i & deny) != i)
- clear_deny(i, stp);
- }
-}
-
__be32
nfsd4_open_downgrade(struct svc_rqst *rqstp,
struct nfsd4_compound_state *cstate,
@@ -4053,21 +4717,20 @@ nfsd4_open_downgrade(struct svc_rqst *rqstp,
dprintk("NFSD: %s: od_deleg_want=0x%x ignored\n", __func__,
od->od_deleg_want);
- nfs4_lock_state();
status = nfs4_preprocess_confirmed_seqid_op(cstate, od->od_seqid,
&od->od_stateid, &stp, nn);
if (status)
goto out;
status = nfserr_inval;
if (!test_access(od->od_share_access, stp)) {
- dprintk("NFSD: access not a subset current bitmap: 0x%lx, input access=%08x\n",
+ dprintk("NFSD: access not a subset of current bitmap: 0x%hhx, input access=%08x\n",
stp->st_access_bmap, od->od_share_access);
- goto out;
+ goto put_stateid;
}
if (!test_deny(od->od_share_deny, stp)) {
- dprintk("NFSD:deny not a subset current bitmap: 0x%lx, input deny=%08x\n",
+ dprintk("NFSD: deny not a subset of current bitmap: 0x%hhx, input deny=%08x\n",
stp->st_deny_bmap, od->od_share_deny);
- goto out;
+ goto put_stateid;
}
nfs4_stateid_downgrade(stp, od->od_share_access);
@@ -4076,17 +4739,31 @@ nfsd4_open_downgrade(struct svc_rqst *rqstp,
update_stateid(&stp->st_stid.sc_stateid);
memcpy(&od->od_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t));
status = nfs_ok;
+put_stateid:
+ nfs4_put_stid(&stp->st_stid);
out:
nfsd4_bump_seqid(cstate, status);
- if (!cstate->replay_owner)
- nfs4_unlock_state();
return status;
}
static void nfsd4_close_open_stateid(struct nfs4_ol_stateid *s)
{
- unhash_open_stateid(s);
+ struct nfs4_client *clp = s->st_stid.sc_client;
+ LIST_HEAD(reaplist);
+
s->st_stid.sc_type = NFS4_CLOSED_STID;
+ spin_lock(&clp->cl_lock);
+ unhash_open_stateid(s, &reaplist);
+
+ if (clp->cl_minorversion) {
+ put_ol_stateid_locked(s, &reaplist);
+ spin_unlock(&clp->cl_lock);
+ free_ol_stateid_reaplist(&reaplist);
+ } else {
+ spin_unlock(&clp->cl_lock);
+ free_ol_stateid_reaplist(&reaplist);
+ move_to_close_lru(s, clp->net);
+ }
}
/*
@@ -4097,7 +4774,6 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
struct nfsd4_close *close)
{
__be32 status;
- struct nfs4_openowner *oo;
struct nfs4_ol_stateid *stp;
struct net *net = SVC_NET(rqstp);
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
@@ -4105,7 +4781,6 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
dprintk("NFSD: nfsd4_close on file %pd\n",
cstate->current_fh.fh_dentry);
- nfs4_lock_state();
status = nfs4_preprocess_seqid_op(cstate, close->cl_seqid,
&close->cl_stateid,
NFS4_OPEN_STID|NFS4_CLOSED_STID,
@@ -4113,31 +4788,14 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
nfsd4_bump_seqid(cstate, status);
if (status)
goto out;
- oo = openowner(stp->st_stateowner);
update_stateid(&stp->st_stid.sc_stateid);
memcpy(&close->cl_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t));
nfsd4_close_open_stateid(stp);
- if (cstate->minorversion)
- free_generic_stateid(stp);
- else
- oo->oo_last_closed_stid = stp;
-
- if (list_empty(&oo->oo_owner.so_stateids)) {
- if (cstate->minorversion)
- release_openowner(oo);
- else {
- /*
- * In the 4.0 case we need to keep the owners around a
- * little while to handle CLOSE replay.
- */
- move_to_close_lru(oo, SVC_NET(rqstp));
- }
- }
+ /* put reference from nfs4_preprocess_seqid_op */
+ nfs4_put_stid(&stp->st_stid);
out:
- if (!cstate->replay_owner)
- nfs4_unlock_state();
return status;
}
@@ -4154,28 +4812,24 @@ nfsd4_delegreturn(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
if ((status = fh_verify(rqstp, &cstate->current_fh, S_IFREG, 0)))
return status;
- nfs4_lock_state();
- status = nfsd4_lookup_stateid(stateid, NFS4_DELEG_STID, &s,
- cstate->minorversion, nn);
+ status = nfsd4_lookup_stateid(cstate, stateid, NFS4_DELEG_STID, &s, nn);
if (status)
goto out;
dp = delegstateid(s);
status = check_stateid_generation(stateid, &dp->dl_stid.sc_stateid, nfsd4_has_session(cstate));
if (status)
- goto out;
+ goto put_stateid;
destroy_delegation(dp);
+put_stateid:
+ nfs4_put_stid(&dp->dl_stid);
out:
- nfs4_unlock_state();
-
return status;
}
#define LOFF_OVERFLOW(start, len) ((u64)(len) > ~(u64)(start))
-#define LOCKOWNER_INO_HASH_MASK (LOCKOWNER_INO_HASH_SIZE - 1)
-
static inline u64
end_offset(u64 start, u64 len)
{
@@ -4196,13 +4850,6 @@ last_byte_offset(u64 start, u64 len)
return end > start ? end - 1: NFS4_MAX_UINT64;
}
-static unsigned int lockowner_ino_hashval(struct inode *inode, u32 cl_id, struct xdr_netobj *ownername)
-{
- return (file_hashval(inode) + cl_id
- + opaque_hashval(ownername->data, ownername->len))
- & LOCKOWNER_INO_HASH_MASK;
-}
-
/*
* TODO: Linux file offsets are _signed_ 64-bit quantities, which means that
* we can't properly handle lock requests that go beyond the (2^63 - 1)-th
@@ -4255,47 +4902,56 @@ nevermind:
deny->ld_type = NFS4_WRITE_LT;
}
-static bool same_lockowner_ino(struct nfs4_lockowner *lo, struct inode *inode, clientid_t *clid, struct xdr_netobj *owner)
+static struct nfs4_lockowner *
+find_lockowner_str_locked(clientid_t *clid, struct xdr_netobj *owner,
+ struct nfs4_client *clp)
{
- struct nfs4_ol_stateid *lst;
+ unsigned int strhashval = ownerstr_hashval(owner);
+ struct nfs4_stateowner *so;
- if (!same_owner_str(&lo->lo_owner, owner, clid))
- return false;
- if (list_empty(&lo->lo_owner.so_stateids)) {
- WARN_ON_ONCE(1);
- return false;
+ lockdep_assert_held(&clp->cl_lock);
+
+ list_for_each_entry(so, &clp->cl_ownerstr_hashtbl[strhashval],
+ so_strhash) {
+ if (so->so_is_open_owner)
+ continue;
+ if (!same_owner_str(so, owner))
+ continue;
+ atomic_inc(&so->so_count);
+ return lockowner(so);
}
- lst = list_first_entry(&lo->lo_owner.so_stateids,
- struct nfs4_ol_stateid, st_perstateowner);
- return lst->st_file->fi_inode == inode;
+ return NULL;
}
static struct nfs4_lockowner *
-find_lockowner_str(struct inode *inode, clientid_t *clid,
- struct xdr_netobj *owner, struct nfsd_net *nn)
+find_lockowner_str(clientid_t *clid, struct xdr_netobj *owner,
+ struct nfs4_client *clp)
{
- unsigned int hashval = lockowner_ino_hashval(inode, clid->cl_id, owner);
struct nfs4_lockowner *lo;
- list_for_each_entry(lo, &nn->lockowner_ino_hashtbl[hashval], lo_owner_ino_hash) {
- if (same_lockowner_ino(lo, inode, clid, owner))
- return lo;
- }
- return NULL;
+ spin_lock(&clp->cl_lock);
+ lo = find_lockowner_str_locked(clid, owner, clp);
+ spin_unlock(&clp->cl_lock);
+ return lo;
}
-static void hash_lockowner(struct nfs4_lockowner *lo, unsigned int strhashval, struct nfs4_client *clp, struct nfs4_ol_stateid *open_stp)
+static void nfs4_unhash_lockowner(struct nfs4_stateowner *sop)
{
- struct inode *inode = open_stp->st_file->fi_inode;
- unsigned int inohash = lockowner_ino_hashval(inode,
- clp->cl_clientid.cl_id, &lo->lo_owner.so_owner);
- struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
+ unhash_lockowner_locked(lockowner(sop));
+}
+
+static void nfs4_free_lockowner(struct nfs4_stateowner *sop)
+{
+ struct nfs4_lockowner *lo = lockowner(sop);
- list_add(&lo->lo_owner.so_strhash, &nn->ownerstr_hashtbl[strhashval]);
- list_add(&lo->lo_owner_ino_hash, &nn->lockowner_ino_hashtbl[inohash]);
- list_add(&lo->lo_perstateid, &open_stp->st_lockowners);
+ kmem_cache_free(lockowner_slab, lo);
}
+static const struct nfs4_stateowner_operations lockowner_ops = {
+ .so_unhash = nfs4_unhash_lockowner,
+ .so_free = nfs4_free_lockowner,
+};
+
/*
* Alloc a lock owner structure.
* Called in nfsd4_lock - therefore, OPEN and OPEN_CONFIRM (if needed) has
@@ -4303,42 +4959,107 @@ static void hash_lockowner(struct nfs4_lockowner *lo, unsigned int strhashval, s
*
* strhashval = ownerstr_hashval
*/
-
static struct nfs4_lockowner *
-alloc_init_lock_stateowner(unsigned int strhashval, struct nfs4_client *clp, struct nfs4_ol_stateid *open_stp, struct nfsd4_lock *lock) {
- struct nfs4_lockowner *lo;
+alloc_init_lock_stateowner(unsigned int strhashval, struct nfs4_client *clp,
+ struct nfs4_ol_stateid *open_stp,
+ struct nfsd4_lock *lock)
+{
+ struct nfs4_lockowner *lo, *ret;
lo = alloc_stateowner(lockowner_slab, &lock->lk_new_owner, clp);
if (!lo)
return NULL;
INIT_LIST_HEAD(&lo->lo_owner.so_stateids);
lo->lo_owner.so_is_open_owner = 0;
- /* It is the openowner seqid that will be incremented in encode in the
- * case of new lockowners; so increment the lock seqid manually: */
- lo->lo_owner.so_seqid = lock->lk_new_lock_seqid + 1;
- hash_lockowner(lo, strhashval, clp, open_stp);
+ lo->lo_owner.so_seqid = lock->lk_new_lock_seqid;
+ lo->lo_owner.so_ops = &lockowner_ops;
+ spin_lock(&clp->cl_lock);
+ ret = find_lockowner_str_locked(&clp->cl_clientid,
+ &lock->lk_new_owner, clp);
+ if (ret == NULL) {
+ list_add(&lo->lo_owner.so_strhash,
+ &clp->cl_ownerstr_hashtbl[strhashval]);
+ ret = lo;
+ } else
+ nfs4_free_lockowner(&lo->lo_owner);
+ spin_unlock(&clp->cl_lock);
return lo;
}
-static struct nfs4_ol_stateid *
-alloc_init_lock_stateid(struct nfs4_lockowner *lo, struct nfs4_file *fp, struct nfs4_ol_stateid *open_stp)
+static void
+init_lock_stateid(struct nfs4_ol_stateid *stp, struct nfs4_lockowner *lo,
+ struct nfs4_file *fp, struct inode *inode,
+ struct nfs4_ol_stateid *open_stp)
{
- struct nfs4_ol_stateid *stp;
struct nfs4_client *clp = lo->lo_owner.so_client;
- stp = nfs4_alloc_stateid(clp);
- if (stp == NULL)
- return NULL;
+ lockdep_assert_held(&clp->cl_lock);
+
+ atomic_inc(&stp->st_stid.sc_count);
stp->st_stid.sc_type = NFS4_LOCK_STID;
- list_add(&stp->st_perfile, &fp->fi_stateids);
- list_add(&stp->st_perstateowner, &lo->lo_owner.so_stateids);
stp->st_stateowner = &lo->lo_owner;
+ atomic_inc(&lo->lo_owner.so_count);
get_nfs4_file(fp);
- stp->st_file = fp;
+ stp->st_stid.sc_file = fp;
+ stp->st_stid.sc_free = nfs4_free_lock_stateid;
stp->st_access_bmap = 0;
stp->st_deny_bmap = open_stp->st_deny_bmap;
stp->st_openstp = open_stp;
- return stp;
+ list_add(&stp->st_locks, &open_stp->st_locks);
+ list_add(&stp->st_perstateowner, &lo->lo_owner.so_stateids);
+ spin_lock(&fp->fi_lock);
+ list_add(&stp->st_perfile, &fp->fi_stateids);
+ spin_unlock(&fp->fi_lock);
+}
+
+static struct nfs4_ol_stateid *
+find_lock_stateid(struct nfs4_lockowner *lo, struct nfs4_file *fp)
+{
+ struct nfs4_ol_stateid *lst;
+ struct nfs4_client *clp = lo->lo_owner.so_client;
+
+ lockdep_assert_held(&clp->cl_lock);
+
+ list_for_each_entry(lst, &lo->lo_owner.so_stateids, st_perstateowner) {
+ if (lst->st_stid.sc_file == fp) {
+ atomic_inc(&lst->st_stid.sc_count);
+ return lst;
+ }
+ }
+ return NULL;
+}
+
+static struct nfs4_ol_stateid *
+find_or_create_lock_stateid(struct nfs4_lockowner *lo, struct nfs4_file *fi,
+ struct inode *inode, struct nfs4_ol_stateid *ost,
+ bool *new)
+{
+ struct nfs4_stid *ns = NULL;
+ struct nfs4_ol_stateid *lst;
+ struct nfs4_openowner *oo = openowner(ost->st_stateowner);
+ struct nfs4_client *clp = oo->oo_owner.so_client;
+
+ spin_lock(&clp->cl_lock);
+ lst = find_lock_stateid(lo, fi);
+ if (lst == NULL) {
+ spin_unlock(&clp->cl_lock);
+ ns = nfs4_alloc_stid(clp, stateid_slab);
+ if (ns == NULL)
+ return NULL;
+
+ spin_lock(&clp->cl_lock);
+ lst = find_lock_stateid(lo, fi);
+ if (likely(!lst)) {
+ lst = openlockstateid(ns);
+ init_lock_stateid(lst, lo, fi, inode, ost);
+ ns = NULL;
+ *new = true;
+ }
+ }
+ spin_unlock(&clp->cl_lock);
+ if (ns)
+ nfs4_put_stid(ns);
+ return lst;
}
static int
@@ -4350,46 +5071,53 @@ check_lock_length(u64 offset, u64 length)
static void get_lock_access(struct nfs4_ol_stateid *lock_stp, u32 access)
{
- struct nfs4_file *fp = lock_stp->st_file;
- int oflag = nfs4_access_to_omode(access);
+ struct nfs4_file *fp = lock_stp->st_stid.sc_file;
+
+ lockdep_assert_held(&fp->fi_lock);
if (test_access(access, lock_stp))
return;
- nfs4_file_get_access(fp, oflag);
+ __nfs4_file_get_access(fp, access);
set_access(access, lock_stp);
}
-static __be32 lookup_or_create_lock_state(struct nfsd4_compound_state *cstate, struct nfs4_ol_stateid *ost, struct nfsd4_lock *lock, struct nfs4_ol_stateid **lst, bool *new)
+static __be32
+lookup_or_create_lock_state(struct nfsd4_compound_state *cstate,
+ struct nfs4_ol_stateid *ost,
+ struct nfsd4_lock *lock,
+ struct nfs4_ol_stateid **lst, bool *new)
{
- struct nfs4_file *fi = ost->st_file;
+ __be32 status;
+ struct nfs4_file *fi = ost->st_stid.sc_file;
struct nfs4_openowner *oo = openowner(ost->st_stateowner);
struct nfs4_client *cl = oo->oo_owner.so_client;
+ struct inode *inode = cstate->current_fh.fh_dentry->d_inode;
struct nfs4_lockowner *lo;
unsigned int strhashval;
- struct nfsd_net *nn = net_generic(cl->net, nfsd_net_id);
-
- lo = find_lockowner_str(fi->fi_inode, &cl->cl_clientid,
- &lock->v.new.owner, nn);
- if (lo) {
- if (!cstate->minorversion)
- return nfserr_bad_seqid;
- /* XXX: a lockowner always has exactly one stateid: */
- *lst = list_first_entry(&lo->lo_owner.so_stateids,
- struct nfs4_ol_stateid, st_perstateowner);
- return nfs_ok;
+
+ lo = find_lockowner_str(&cl->cl_clientid, &lock->v.new.owner, cl);
+ if (!lo) {
+ strhashval = ownerstr_hashval(&lock->v.new.owner);
+ lo = alloc_init_lock_stateowner(strhashval, cl, ost, lock);
+ if (lo == NULL)
+ return nfserr_jukebox;
+ } else {
+ /* with an existing lockowner, seqids must be the same */
+ status = nfserr_bad_seqid;
+ if (!cstate->minorversion &&
+ lock->lk_new_lock_seqid != lo->lo_owner.so_seqid)
+ goto out;
}
- strhashval = ownerstr_hashval(cl->cl_clientid.cl_id,
- &lock->v.new.owner);
- lo = alloc_init_lock_stateowner(strhashval, cl, ost, lock);
- if (lo == NULL)
- return nfserr_jukebox;
- *lst = alloc_init_lock_stateid(lo, fi, ost);
+
+ *lst = find_or_create_lock_stateid(lo, fi, inode, ost, new);
if (*lst == NULL) {
- release_lockowner(lo);
- return nfserr_jukebox;
+ status = nfserr_jukebox;
+ goto out;
}
- *new = true;
- return nfs_ok;
+ status = nfs_ok;
+out:
+ nfs4_put_stateowner(&lo->lo_owner);
+ return status;
}
/*
@@ -4401,14 +5129,16 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
{
struct nfs4_openowner *open_sop = NULL;
struct nfs4_lockowner *lock_sop = NULL;
- struct nfs4_ol_stateid *lock_stp;
+ struct nfs4_ol_stateid *lock_stp = NULL;
+ struct nfs4_ol_stateid *open_stp = NULL;
+ struct nfs4_file *fp;
struct file *filp = NULL;
struct file_lock *file_lock = NULL;
struct file_lock *conflock = NULL;
__be32 status = 0;
- bool new_state = false;
int lkflg;
int err;
+ bool new = false;
struct net *net = SVC_NET(rqstp);
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
@@ -4425,11 +5155,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
return status;
}
- nfs4_lock_state();
-
if (lock->lk_is_new) {
- struct nfs4_ol_stateid *open_stp = NULL;
-
if (nfsd4_has_session(cstate))
/* See rfc 5661 18.10.3: given clientid is ignored: */
memcpy(&lock->v.new.clientid,
@@ -4453,12 +5179,13 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
&lock->v.new.clientid))
goto out;
status = lookup_or_create_lock_state(cstate, open_stp, lock,
- &lock_stp, &new_state);
- } else
+ &lock_stp, &new);
+ } else {
status = nfs4_preprocess_seqid_op(cstate,
lock->lk_old_lock_seqid,
&lock->lk_old_lock_stateid,
NFS4_LOCK_STID, &lock_stp, nn);
+ }
if (status)
goto out;
lock_sop = lockowner(lock_stp->st_stateowner);
@@ -4482,20 +5209,25 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
goto out;
}
+ fp = lock_stp->st_stid.sc_file;
locks_init_lock(file_lock);
switch (lock->lk_type) {
case NFS4_READ_LT:
case NFS4_READW_LT:
- filp = find_readable_file(lock_stp->st_file);
+ spin_lock(&fp->fi_lock);
+ filp = find_readable_file_locked(fp);
if (filp)
get_lock_access(lock_stp, NFS4_SHARE_ACCESS_READ);
+ spin_unlock(&fp->fi_lock);
file_lock->fl_type = F_RDLCK;
break;
case NFS4_WRITE_LT:
case NFS4_WRITEW_LT:
- filp = find_writeable_file(lock_stp->st_file);
+ spin_lock(&fp->fi_lock);
+ filp = find_writeable_file_locked(fp);
if (filp)
get_lock_access(lock_stp, NFS4_SHARE_ACCESS_WRITE);
+ spin_unlock(&fp->fi_lock);
file_lock->fl_type = F_WRLCK;
break;
default:
@@ -4544,11 +5276,27 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
break;
}
out:
- if (status && new_state)
- release_lockowner(lock_sop);
+ if (filp)
+ fput(filp);
+ if (lock_stp) {
+ /* Bump seqid manually if the 4.0 replay owner is openowner */
+ if (cstate->replay_owner &&
+ cstate->replay_owner != &lock_sop->lo_owner &&
+ seqid_mutating_err(ntohl(status)))
+ lock_sop->lo_owner.so_seqid++;
+
+ /*
+ * If this is a new, never-before-used stateid, and we are
+ * returning an error, then just go ahead and release it.
+ */
+ if (status && new)
+ release_lock_stateid(lock_stp);
+
+ nfs4_put_stid(&lock_stp->st_stid);
+ }
+ if (open_stp)
+ nfs4_put_stid(&open_stp->st_stid);
nfsd4_bump_seqid(cstate, status);
- if (!cstate->replay_owner)
- nfs4_unlock_state();
if (file_lock)
locks_free_lock(file_lock);
if (conflock)
@@ -4580,9 +5328,8 @@ __be32
nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
struct nfsd4_lockt *lockt)
{
- struct inode *inode;
struct file_lock *file_lock = NULL;
- struct nfs4_lockowner *lo;
+ struct nfs4_lockowner *lo = NULL;
__be32 status;
struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
@@ -4592,10 +5339,8 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
if (check_lock_length(lockt->lt_offset, lockt->lt_length))
return nfserr_inval;
- nfs4_lock_state();
-
if (!nfsd4_has_session(cstate)) {
- status = lookup_clientid(&lockt->lt_clientid, false, nn, NULL);
+ status = lookup_clientid(&lockt->lt_clientid, cstate, nn);
if (status)
goto out;
}
@@ -4603,7 +5348,6 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
if ((status = fh_verify(rqstp, &cstate->current_fh, S_IFREG, 0)))
goto out;
- inode = cstate->current_fh.fh_dentry->d_inode;
file_lock = locks_alloc_lock();
if (!file_lock) {
dprintk("NFSD: %s: unable to allocate lock!\n", __func__);
@@ -4626,7 +5370,8 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
goto out;
}
- lo = find_lockowner_str(inode, &lockt->lt_clientid, &lockt->lt_owner, nn);
+ lo = find_lockowner_str(&lockt->lt_clientid, &lockt->lt_owner,
+ cstate->clp);
if (lo)
file_lock->fl_owner = (fl_owner_t)lo;
file_lock->fl_pid = current->tgid;
@@ -4646,7 +5391,8 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
nfs4_set_lock_denied(file_lock, &lockt->lt_denied);
}
out:
- nfs4_unlock_state();
+ if (lo)
+ nfs4_put_stateowner(&lo->lo_owner);
if (file_lock)
locks_free_lock(file_lock);
return status;
@@ -4670,23 +5416,21 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
if (check_lock_length(locku->lu_offset, locku->lu_length))
return nfserr_inval;
- nfs4_lock_state();
-
status = nfs4_preprocess_seqid_op(cstate, locku->lu_seqid,
&locku->lu_stateid, NFS4_LOCK_STID,
&stp, nn);
if (status)
goto out;
- filp = find_any_file(stp->st_file);
+ filp = find_any_file(stp->st_stid.sc_file);
if (!filp) {
status = nfserr_lock_range;
- goto out;
+ goto put_stateid;
}
file_lock = locks_alloc_lock();
if (!file_lock) {
dprintk("NFSD: %s: unable to allocate lock!\n", __func__);
status = nfserr_jukebox;
- goto out;
+ goto fput;
}
locks_init_lock(file_lock);
file_lock->fl_type = F_UNLCK;
@@ -4708,41 +5452,51 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
}
update_stateid(&stp->st_stid.sc_stateid);
memcpy(&locku->lu_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t));
-
+fput:
+ fput(filp);
+put_stateid:
+ nfs4_put_stid(&stp->st_stid);
out:
nfsd4_bump_seqid(cstate, status);
- if (!cstate->replay_owner)
- nfs4_unlock_state();
if (file_lock)
locks_free_lock(file_lock);
return status;
out_nfserr:
status = nfserrno(err);
- goto out;
+ goto fput;
}
/*
* returns
- * 1: locks held by lockowner
- * 0: no locks held by lockowner
+ * true: locks held by lockowner
+ * false: no locks held by lockowner
*/
-static int
-check_for_locks(struct nfs4_file *filp, struct nfs4_lockowner *lowner)
+static bool
+check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner)
{
struct file_lock **flpp;
- struct inode *inode = filp->fi_inode;
- int status = 0;
+ int status = false;
+ struct file *filp = find_any_file(fp);
+ struct inode *inode;
+
+ if (!filp) {
+ /* Any valid lock stateid should have some sort of access */
+ WARN_ON_ONCE(1);
+ return status;
+ }
+
+ inode = file_inode(filp);
spin_lock(&inode->i_lock);
for (flpp = &inode->i_flock; *flpp != NULL; flpp = &(*flpp)->fl_next) {
if ((*flpp)->fl_owner == (fl_owner_t)lowner) {
- status = 1;
- goto out;
+ status = true;
+ break;
}
}
-out:
spin_unlock(&inode->i_lock);
+ fput(filp);
return status;
}
@@ -4753,53 +5507,46 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp,
{
clientid_t *clid = &rlockowner->rl_clientid;
struct nfs4_stateowner *sop;
- struct nfs4_lockowner *lo;
+ struct nfs4_lockowner *lo = NULL;
struct nfs4_ol_stateid *stp;
struct xdr_netobj *owner = &rlockowner->rl_owner;
- struct list_head matches;
- unsigned int hashval = ownerstr_hashval(clid->cl_id, owner);
+ unsigned int hashval = ownerstr_hashval(owner);
__be32 status;
struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
+ struct nfs4_client *clp;
dprintk("nfsd4_release_lockowner clientid: (%08x/%08x):\n",
clid->cl_boot, clid->cl_id);
- nfs4_lock_state();
-
- status = lookup_clientid(clid, cstate->minorversion, nn, NULL);
+ status = lookup_clientid(clid, cstate, nn);
if (status)
- goto out;
+ return status;
- status = nfserr_locks_held;
- INIT_LIST_HEAD(&matches);
+ clp = cstate->clp;
+ /* Find the matching lock stateowner */
+ spin_lock(&clp->cl_lock);
+ list_for_each_entry(sop, &clp->cl_ownerstr_hashtbl[hashval],
+ so_strhash) {
- list_for_each_entry(sop, &nn->ownerstr_hashtbl[hashval], so_strhash) {
- if (sop->so_is_open_owner)
+ if (sop->so_is_open_owner || !same_owner_str(sop, owner))
continue;
- if (!same_owner_str(sop, owner, clid))
- continue;
- list_for_each_entry(stp, &sop->so_stateids,
- st_perstateowner) {
- lo = lockowner(sop);
- if (check_for_locks(stp->st_file, lo))
- goto out;
- list_add(&lo->lo_list, &matches);
+
+ /* see if there are still any locks associated with it */
+ lo = lockowner(sop);
+ list_for_each_entry(stp, &sop->so_stateids, st_perstateowner) {
+ if (check_for_locks(stp->st_stid.sc_file, lo)) {
+ status = nfserr_locks_held;
+ spin_unlock(&clp->cl_lock);
+ return status;
+ }
}
+
+ atomic_inc(&sop->so_count);
+ break;
}
- /* Clients probably won't expect us to return with some (but not all)
- * of the lockowner state released; so don't release any until all
- * have been checked. */
- status = nfs_ok;
- while (!list_empty(&matches)) {
- lo = list_entry(matches.next, struct nfs4_lockowner,
- lo_list);
- /* unhash_stateowner deletes so_perclient only
- * for openowners. */
- list_del(&lo->lo_list);
+ spin_unlock(&clp->cl_lock);
+ if (lo)
release_lockowner(lo);
- }
-out:
- nfs4_unlock_state();
return status;
}
@@ -4887,34 +5634,123 @@ nfsd4_find_reclaim_client(const char *recdir, struct nfsd_net *nn)
* Called from OPEN. Look for clientid in reclaim list.
*/
__be32
-nfs4_check_open_reclaim(clientid_t *clid, bool sessions, struct nfsd_net *nn)
+nfs4_check_open_reclaim(clientid_t *clid,
+ struct nfsd4_compound_state *cstate,
+ struct nfsd_net *nn)
{
- struct nfs4_client *clp;
+ __be32 status;
/* find clientid in conf_id_hashtbl */
- clp = find_confirmed_client(clid, sessions, nn);
- if (clp == NULL)
+ status = lookup_clientid(clid, cstate, nn);
+ if (status)
return nfserr_reclaim_bad;
- return nfsd4_client_record_check(clp) ? nfserr_reclaim_bad : nfs_ok;
+ if (nfsd4_client_record_check(cstate->clp))
+ return nfserr_reclaim_bad;
+
+ return nfs_ok;
}
#ifdef CONFIG_NFSD_FAULT_INJECTION
+static inline void
+put_client(struct nfs4_client *clp)
+{
+ atomic_dec(&clp->cl_refcount);
+}
-u64 nfsd_forget_client(struct nfs4_client *clp, u64 max)
+static struct nfs4_client *
+nfsd_find_client(struct sockaddr_storage *addr, size_t addr_size)
{
- if (mark_client_expired(clp))
- return 0;
- expire_client(clp);
- return 1;
+ struct nfs4_client *clp;
+ struct nfsd_net *nn = net_generic(current->nsproxy->net_ns,
+ nfsd_net_id);
+
+ if (!nfsd_netns_ready(nn))
+ return NULL;
+
+ list_for_each_entry(clp, &nn->client_lru, cl_lru) {
+ if (memcmp(&clp->cl_addr, addr, addr_size) == 0)
+ return clp;
+ }
+ return NULL;
}
-u64 nfsd_print_client(struct nfs4_client *clp, u64 num)
+u64
+nfsd_inject_print_clients(void)
{
+ struct nfs4_client *clp;
+ u64 count = 0;
+ struct nfsd_net *nn = net_generic(current->nsproxy->net_ns,
+ nfsd_net_id);
char buf[INET6_ADDRSTRLEN];
- rpc_ntop((struct sockaddr *)&clp->cl_addr, buf, sizeof(buf));
- printk(KERN_INFO "NFS Client: %s\n", buf);
- return 1;
+
+ if (!nfsd_netns_ready(nn))
+ return 0;
+
+ spin_lock(&nn->client_lock);
+ list_for_each_entry(clp, &nn->client_lru, cl_lru) {
+ rpc_ntop((struct sockaddr *)&clp->cl_addr, buf, sizeof(buf));
+ pr_info("NFS Client: %s\n", buf);
+ ++count;
+ }
+ spin_unlock(&nn->client_lock);
+
+ return count;
+}
+
+u64
+nfsd_inject_forget_client(struct sockaddr_storage *addr, size_t addr_size)
+{
+ u64 count = 0;
+ struct nfs4_client *clp;
+ struct nfsd_net *nn = net_generic(current->nsproxy->net_ns,
+ nfsd_net_id);
+
+ if (!nfsd_netns_ready(nn))
+ return count;
+
+ spin_lock(&nn->client_lock);
+ clp = nfsd_find_client(addr, addr_size);
+ if (clp) {
+ if (mark_client_expired_locked(clp) == nfs_ok)
+ ++count;
+ else
+ clp = NULL;
+ }
+ spin_unlock(&nn->client_lock);
+
+ if (clp)
+ expire_client(clp);
+
+ return count;
+}
+
+u64
+nfsd_inject_forget_clients(u64 max)
+{
+ u64 count = 0;
+ struct nfs4_client *clp, *next;
+ struct nfsd_net *nn = net_generic(current->nsproxy->net_ns,
+ nfsd_net_id);
+ LIST_HEAD(reaplist);
+
+ if (!nfsd_netns_ready(nn))
+ return count;
+
+ spin_lock(&nn->client_lock);
+ list_for_each_entry_safe(clp, next, &nn->client_lru, cl_lru) {
+ if (mark_client_expired_locked(clp) == nfs_ok) {
+ list_add(&clp->cl_lru, &reaplist);
+ if (max != 0 && ++count >= max)
+ break;
+ }
+ }
+ spin_unlock(&nn->client_lock);
+
+ list_for_each_entry_safe(clp, next, &reaplist, cl_lru)
+ expire_client(clp);
+
+ return count;
}
static void nfsd_print_count(struct nfs4_client *clp, unsigned int count,
@@ -4925,158 +5761,484 @@ static void nfsd_print_count(struct nfs4_client *clp, unsigned int count,
printk(KERN_INFO "NFS Client: %s has %u %s\n", buf, count, type);
}
-static u64 nfsd_foreach_client_lock(struct nfs4_client *clp, u64 max, void (*func)(struct nfs4_lockowner *))
+static void
+nfsd_inject_add_lock_to_list(struct nfs4_ol_stateid *lst,
+ struct list_head *collect)
+{
+ struct nfs4_client *clp = lst->st_stid.sc_client;
+ struct nfsd_net *nn = net_generic(current->nsproxy->net_ns,
+ nfsd_net_id);
+
+ if (!collect)
+ return;
+
+ lockdep_assert_held(&nn->client_lock);
+ atomic_inc(&clp->cl_refcount);
+ list_add(&lst->st_locks, collect);
+}
+
+static u64 nfsd_foreach_client_lock(struct nfs4_client *clp, u64 max,
+ struct list_head *collect,
+ void (*func)(struct nfs4_ol_stateid *))
{
struct nfs4_openowner *oop;
- struct nfs4_lockowner *lop, *lo_next;
struct nfs4_ol_stateid *stp, *st_next;
+ struct nfs4_ol_stateid *lst, *lst_next;
u64 count = 0;
+ spin_lock(&clp->cl_lock);
list_for_each_entry(oop, &clp->cl_openowners, oo_perclient) {
- list_for_each_entry_safe(stp, st_next, &oop->oo_owner.so_stateids, st_perstateowner) {
- list_for_each_entry_safe(lop, lo_next, &stp->st_lockowners, lo_perstateid) {
- if (func)
- func(lop);
- if (++count == max)
- return count;
+ list_for_each_entry_safe(stp, st_next,
+ &oop->oo_owner.so_stateids, st_perstateowner) {
+ list_for_each_entry_safe(lst, lst_next,
+ &stp->st_locks, st_locks) {
+ if (func) {
+ func(lst);
+ nfsd_inject_add_lock_to_list(lst,
+ collect);
+ }
+ ++count;
+ /*
+ * Despite the fact that these functions deal
+ * with 64-bit integers for "count", we must
+ * ensure that it doesn't blow up the
+ * clp->cl_refcount. Throw a warning if we
+ * start to approach INT_MAX here.
+ */
+ WARN_ON_ONCE(count == (INT_MAX / 2));
+ if (count == max)
+ goto out;
}
}
}
+out:
+ spin_unlock(&clp->cl_lock);
return count;
}
-u64 nfsd_forget_client_locks(struct nfs4_client *clp, u64 max)
+static u64
+nfsd_collect_client_locks(struct nfs4_client *clp, struct list_head *collect,
+ u64 max)
{
- return nfsd_foreach_client_lock(clp, max, release_lockowner);
+ return nfsd_foreach_client_lock(clp, max, collect, unhash_lock_stateid);
}
-u64 nfsd_print_client_locks(struct nfs4_client *clp, u64 max)
+static u64
+nfsd_print_client_locks(struct nfs4_client *clp)
{
- u64 count = nfsd_foreach_client_lock(clp, max, NULL);
+ u64 count = nfsd_foreach_client_lock(clp, 0, NULL, NULL);
nfsd_print_count(clp, count, "locked files");
return count;
}
-static u64 nfsd_foreach_client_open(struct nfs4_client *clp, u64 max, void (*func)(struct nfs4_openowner *))
+u64
+nfsd_inject_print_locks(void)
+{
+ struct nfs4_client *clp;
+ u64 count = 0;
+ struct nfsd_net *nn = net_generic(current->nsproxy->net_ns,
+ nfsd_net_id);
+
+ if (!nfsd_netns_ready(nn))
+ return 0;
+
+ spin_lock(&nn->client_lock);
+ list_for_each_entry(clp, &nn->client_lru, cl_lru)
+ count += nfsd_print_client_locks(clp);
+ spin_unlock(&nn->client_lock);
+
+ return count;
+}
+
+static void
+nfsd_reap_locks(struct list_head *reaplist)
+{
+ struct nfs4_client *clp;
+ struct nfs4_ol_stateid *stp, *next;
+
+ list_for_each_entry_safe(stp, next, reaplist, st_locks) {
+ list_del_init(&stp->st_locks);
+ clp = stp->st_stid.sc_client;
+ nfs4_put_stid(&stp->st_stid);
+ put_client(clp);
+ }
+}
+
+u64
+nfsd_inject_forget_client_locks(struct sockaddr_storage *addr, size_t addr_size)
+{
+ unsigned int count = 0;
+ struct nfs4_client *clp;
+ struct nfsd_net *nn = net_generic(current->nsproxy->net_ns,
+ nfsd_net_id);
+ LIST_HEAD(reaplist);
+
+ if (!nfsd_netns_ready(nn))
+ return count;
+
+ spin_lock(&nn->client_lock);
+ clp = nfsd_find_client(addr, addr_size);
+ if (clp)
+ count = nfsd_collect_client_locks(clp, &reaplist, 0);
+ spin_unlock(&nn->client_lock);
+ nfsd_reap_locks(&reaplist);
+ return count;
+}
+
+u64
+nfsd_inject_forget_locks(u64 max)
+{
+ u64 count = 0;
+ struct nfs4_client *clp;
+ struct nfsd_net *nn = net_generic(current->nsproxy->net_ns,
+ nfsd_net_id);
+ LIST_HEAD(reaplist);
+
+ if (!nfsd_netns_ready(nn))
+ return count;
+
+ spin_lock(&nn->client_lock);
+ list_for_each_entry(clp, &nn->client_lru, cl_lru) {
+ count += nfsd_collect_client_locks(clp, &reaplist, max - count);
+ if (max != 0 && count >= max)
+ break;
+ }
+ spin_unlock(&nn->client_lock);
+ nfsd_reap_locks(&reaplist);
+ return count;
+}
+
+static u64
+nfsd_foreach_client_openowner(struct nfs4_client *clp, u64 max,
+ struct list_head *collect,
+ void (*func)(struct nfs4_openowner *))
{
struct nfs4_openowner *oop, *next;
+ struct nfsd_net *nn = net_generic(current->nsproxy->net_ns,
+ nfsd_net_id);
u64 count = 0;
+ lockdep_assert_held(&nn->client_lock);
+
+ spin_lock(&clp->cl_lock);
list_for_each_entry_safe(oop, next, &clp->cl_openowners, oo_perclient) {
- if (func)
+ if (func) {
func(oop);
- if (++count == max)
+ if (collect) {
+ atomic_inc(&clp->cl_refcount);
+ list_add(&oop->oo_perclient, collect);
+ }
+ }
+ ++count;
+ /*
+ * Despite the fact that these functions deal with
+ * 64-bit integers for "count", we must ensure that
+ * it doesn't blow up the clp->cl_refcount. Throw a
+ * warning if we start to approach INT_MAX here.
+ */
+ WARN_ON_ONCE(count == (INT_MAX / 2));
+ if (count == max)
break;
}
+ spin_unlock(&clp->cl_lock);
return count;
}
-u64 nfsd_forget_client_openowners(struct nfs4_client *clp, u64 max)
+static u64
+nfsd_print_client_openowners(struct nfs4_client *clp)
{
- return nfsd_foreach_client_open(clp, max, release_openowner);
+ u64 count = nfsd_foreach_client_openowner(clp, 0, NULL, NULL);
+
+ nfsd_print_count(clp, count, "openowners");
+ return count;
}
-u64 nfsd_print_client_openowners(struct nfs4_client *clp, u64 max)
+static u64
+nfsd_collect_client_openowners(struct nfs4_client *clp,
+ struct list_head *collect, u64 max)
{
- u64 count = nfsd_foreach_client_open(clp, max, NULL);
- nfsd_print_count(clp, count, "open files");
- return count;
+ return nfsd_foreach_client_openowner(clp, max, collect,
+ unhash_openowner_locked);
}
-static u64 nfsd_find_all_delegations(struct nfs4_client *clp, u64 max,
- struct list_head *victims)
+u64
+nfsd_inject_print_openowners(void)
{
- struct nfs4_delegation *dp, *next;
+ struct nfs4_client *clp;
u64 count = 0;
+ struct nfsd_net *nn = net_generic(current->nsproxy->net_ns,
+ nfsd_net_id);
+
+ if (!nfsd_netns_ready(nn))
+ return 0;
+
+ spin_lock(&nn->client_lock);
+ list_for_each_entry(clp, &nn->client_lru, cl_lru)
+ count += nfsd_print_client_openowners(clp);
+ spin_unlock(&nn->client_lock);
- lockdep_assert_held(&state_lock);
- list_for_each_entry_safe(dp, next, &clp->cl_delegations, dl_perclnt) {
- if (victims)
- list_move(&dp->dl_recall_lru, victims);
- if (++count == max)
- break;
- }
return count;
}
-u64 nfsd_forget_client_delegations(struct nfs4_client *clp, u64 max)
+static void
+nfsd_reap_openowners(struct list_head *reaplist)
{
- struct nfs4_delegation *dp, *next;
- LIST_HEAD(victims);
- u64 count;
+ struct nfs4_client *clp;
+ struct nfs4_openowner *oop, *next;
- spin_lock(&state_lock);
- count = nfsd_find_all_delegations(clp, max, &victims);
- spin_unlock(&state_lock);
+ list_for_each_entry_safe(oop, next, reaplist, oo_perclient) {
+ list_del_init(&oop->oo_perclient);
+ clp = oop->oo_owner.so_client;
+ release_openowner(oop);
+ put_client(clp);
+ }
+}
- list_for_each_entry_safe(dp, next, &victims, dl_recall_lru)
- revoke_delegation(dp);
+u64
+nfsd_inject_forget_client_openowners(struct sockaddr_storage *addr,
+ size_t addr_size)
+{
+ unsigned int count = 0;
+ struct nfs4_client *clp;
+ struct nfsd_net *nn = net_generic(current->nsproxy->net_ns,
+ nfsd_net_id);
+ LIST_HEAD(reaplist);
+ if (!nfsd_netns_ready(nn))
+ return count;
+
+ spin_lock(&nn->client_lock);
+ clp = nfsd_find_client(addr, addr_size);
+ if (clp)
+ count = nfsd_collect_client_openowners(clp, &reaplist, 0);
+ spin_unlock(&nn->client_lock);
+ nfsd_reap_openowners(&reaplist);
return count;
}
-u64 nfsd_recall_client_delegations(struct nfs4_client *clp, u64 max)
+u64
+nfsd_inject_forget_openowners(u64 max)
{
- struct nfs4_delegation *dp, *next;
- LIST_HEAD(victims);
- u64 count;
+ u64 count = 0;
+ struct nfs4_client *clp;
+ struct nfsd_net *nn = net_generic(current->nsproxy->net_ns,
+ nfsd_net_id);
+ LIST_HEAD(reaplist);
- spin_lock(&state_lock);
- count = nfsd_find_all_delegations(clp, max, &victims);
- list_for_each_entry_safe(dp, next, &victims, dl_recall_lru)
- nfsd_break_one_deleg(dp);
- spin_unlock(&state_lock);
+ if (!nfsd_netns_ready(nn))
+ return count;
+ spin_lock(&nn->client_lock);
+ list_for_each_entry(clp, &nn->client_lru, cl_lru) {
+ count += nfsd_collect_client_openowners(clp, &reaplist,
+ max - count);
+ if (max != 0 && count >= max)
+ break;
+ }
+ spin_unlock(&nn->client_lock);
+ nfsd_reap_openowners(&reaplist);
return count;
}
-u64 nfsd_print_client_delegations(struct nfs4_client *clp, u64 max)
+static u64 nfsd_find_all_delegations(struct nfs4_client *clp, u64 max,
+ struct list_head *victims)
{
+ struct nfs4_delegation *dp, *next;
+ struct nfsd_net *nn = net_generic(current->nsproxy->net_ns,
+ nfsd_net_id);
u64 count = 0;
+ lockdep_assert_held(&nn->client_lock);
+
spin_lock(&state_lock);
- count = nfsd_find_all_delegations(clp, max, NULL);
+ list_for_each_entry_safe(dp, next, &clp->cl_delegations, dl_perclnt) {
+ if (victims) {
+ /*
+ * It's not safe to mess with delegations that have a
+ * non-zero dl_time. They might have already been broken
+ * and could be processed by the laundromat outside of
+ * the state_lock. Just leave them be.
+ */
+ if (dp->dl_time != 0)
+ continue;
+
+ atomic_inc(&clp->cl_refcount);
+ unhash_delegation_locked(dp);
+ list_add(&dp->dl_recall_lru, victims);
+ }
+ ++count;
+ /*
+ * Despite the fact that these functions deal with
+ * 64-bit integers for "count", we must ensure that
+ * it doesn't blow up the clp->cl_refcount. Throw a
+ * warning if we start to approach INT_MAX here.
+ */
+ WARN_ON_ONCE(count == (INT_MAX / 2));
+ if (count == max)
+ break;
+ }
spin_unlock(&state_lock);
+ return count;
+}
+
+static u64
+nfsd_print_client_delegations(struct nfs4_client *clp)
+{
+ u64 count = nfsd_find_all_delegations(clp, 0, NULL);
nfsd_print_count(clp, count, "delegations");
return count;
}
-u64 nfsd_for_n_state(u64 max, u64 (*func)(struct nfs4_client *, u64))
+u64
+nfsd_inject_print_delegations(void)
{
- struct nfs4_client *clp, *next;
+ struct nfs4_client *clp;
u64 count = 0;
- struct nfsd_net *nn = net_generic(current->nsproxy->net_ns, nfsd_net_id);
+ struct nfsd_net *nn = net_generic(current->nsproxy->net_ns,
+ nfsd_net_id);
if (!nfsd_netns_ready(nn))
return 0;
- list_for_each_entry_safe(clp, next, &nn->client_lru, cl_lru) {
- count += func(clp, max - count);
- if ((max != 0) && (count >= max))
- break;
+ spin_lock(&nn->client_lock);
+ list_for_each_entry(clp, &nn->client_lru, cl_lru)
+ count += nfsd_print_client_delegations(clp);
+ spin_unlock(&nn->client_lock);
+
+ return count;
+}
+
+static void
+nfsd_forget_delegations(struct list_head *reaplist)
+{
+ struct nfs4_client *clp;
+ struct nfs4_delegation *dp, *next;
+
+ list_for_each_entry_safe(dp, next, reaplist, dl_recall_lru) {
+ list_del_init(&dp->dl_recall_lru);
+ clp = dp->dl_stid.sc_client;
+ revoke_delegation(dp);
+ put_client(clp);
}
+}
+u64
+nfsd_inject_forget_client_delegations(struct sockaddr_storage *addr,
+ size_t addr_size)
+{
+ u64 count = 0;
+ struct nfs4_client *clp;
+ struct nfsd_net *nn = net_generic(current->nsproxy->net_ns,
+ nfsd_net_id);
+ LIST_HEAD(reaplist);
+
+ if (!nfsd_netns_ready(nn))
+ return count;
+
+ spin_lock(&nn->client_lock);
+ clp = nfsd_find_client(addr, addr_size);
+ if (clp)
+ count = nfsd_find_all_delegations(clp, 0, &reaplist);
+ spin_unlock(&nn->client_lock);
+
+ nfsd_forget_delegations(&reaplist);
return count;
}
-struct nfs4_client *nfsd_find_client(struct sockaddr_storage *addr, size_t addr_size)
+u64
+nfsd_inject_forget_delegations(u64 max)
{
+ u64 count = 0;
struct nfs4_client *clp;
- struct nfsd_net *nn = net_generic(current->nsproxy->net_ns, nfsd_net_id);
+ struct nfsd_net *nn = net_generic(current->nsproxy->net_ns,
+ nfsd_net_id);
+ LIST_HEAD(reaplist);
if (!nfsd_netns_ready(nn))
- return NULL;
+ return count;
+ spin_lock(&nn->client_lock);
list_for_each_entry(clp, &nn->client_lru, cl_lru) {
- if (memcmp(&clp->cl_addr, addr, addr_size) == 0)
- return clp;
+ count += nfsd_find_all_delegations(clp, max - count, &reaplist);
+ if (max != 0 && count >= max)
+ break;
}
- return NULL;
+ spin_unlock(&nn->client_lock);
+ nfsd_forget_delegations(&reaplist);
+ return count;
}
+static void
+nfsd_recall_delegations(struct list_head *reaplist)
+{
+ struct nfs4_client *clp;
+ struct nfs4_delegation *dp, *next;
+
+ list_for_each_entry_safe(dp, next, reaplist, dl_recall_lru) {
+ list_del_init(&dp->dl_recall_lru);
+ clp = dp->dl_stid.sc_client;
+ /*
+ * We skipped all entries that had a zero dl_time before,
+ * so we can now reset the dl_time back to 0. If a delegation
+ * break comes in now, then it won't make any difference since
+ * we're recalling it either way.
+ */
+ spin_lock(&state_lock);
+ dp->dl_time = 0;
+ spin_unlock(&state_lock);
+ nfsd_break_one_deleg(dp);
+ put_client(clp);
+ }
+}
+
+u64
+nfsd_inject_recall_client_delegations(struct sockaddr_storage *addr,
+ size_t addr_size)
+{
+ u64 count = 0;
+ struct nfs4_client *clp;
+ struct nfsd_net *nn = net_generic(current->nsproxy->net_ns,
+ nfsd_net_id);
+ LIST_HEAD(reaplist);
+
+ if (!nfsd_netns_ready(nn))
+ return count;
+
+ spin_lock(&nn->client_lock);
+ clp = nfsd_find_client(addr, addr_size);
+ if (clp)
+ count = nfsd_find_all_delegations(clp, 0, &reaplist);
+ spin_unlock(&nn->client_lock);
+
+ nfsd_recall_delegations(&reaplist);
+ return count;
+}
+
+u64
+nfsd_inject_recall_delegations(u64 max)
+{
+ u64 count = 0;
+ struct nfs4_client *clp, *next;
+ struct nfsd_net *nn = net_generic(current->nsproxy->net_ns,
+ nfsd_net_id);
+ LIST_HEAD(reaplist);
+
+ if (!nfsd_netns_ready(nn))
+ return count;
+
+ spin_lock(&nn->client_lock);
+ list_for_each_entry_safe(clp, next, &nn->client_lru, cl_lru) {
+ count += nfsd_find_all_delegations(clp, max - count, &reaplist);
+ if (max != 0 && ++count >= max)
+ break;
+ }
+ spin_unlock(&nn->client_lock);
+ nfsd_recall_delegations(&reaplist);
+ return count;
+}
#endif /* CONFIG_NFSD_FAULT_INJECTION */
/*
@@ -5113,14 +6275,6 @@ static int nfs4_state_create_net(struct net *net)
CLIENT_HASH_SIZE, GFP_KERNEL);
if (!nn->unconf_id_hashtbl)
goto err_unconf_id;
- nn->ownerstr_hashtbl = kmalloc(sizeof(struct list_head) *
- OWNER_HASH_SIZE, GFP_KERNEL);
- if (!nn->ownerstr_hashtbl)
- goto err_ownerstr;
- nn->lockowner_ino_hashtbl = kmalloc(sizeof(struct list_head) *
- LOCKOWNER_INO_HASH_SIZE, GFP_KERNEL);
- if (!nn->lockowner_ino_hashtbl)
- goto err_lockowner_ino;
nn->sessionid_hashtbl = kmalloc(sizeof(struct list_head) *
SESSION_HASH_SIZE, GFP_KERNEL);
if (!nn->sessionid_hashtbl)
@@ -5130,10 +6284,6 @@ static int nfs4_state_create_net(struct net *net)
INIT_LIST_HEAD(&nn->conf_id_hashtbl[i]);
INIT_LIST_HEAD(&nn->unconf_id_hashtbl[i]);
}
- for (i = 0; i < OWNER_HASH_SIZE; i++)
- INIT_LIST_HEAD(&nn->ownerstr_hashtbl[i]);
- for (i = 0; i < LOCKOWNER_INO_HASH_SIZE; i++)
- INIT_LIST_HEAD(&nn->lockowner_ino_hashtbl[i]);
for (i = 0; i < SESSION_HASH_SIZE; i++)
INIT_LIST_HEAD(&nn->sessionid_hashtbl[i]);
nn->conf_name_tree = RB_ROOT;
@@ -5149,10 +6299,6 @@ static int nfs4_state_create_net(struct net *net)
return 0;
err_sessionid:
- kfree(nn->lockowner_ino_hashtbl);
-err_lockowner_ino:
- kfree(nn->ownerstr_hashtbl);
-err_ownerstr:
kfree(nn->unconf_id_hashtbl);
err_unconf_id:
kfree(nn->conf_id_hashtbl);
@@ -5182,8 +6328,6 @@ nfs4_state_destroy_net(struct net *net)
}
kfree(nn->sessionid_hashtbl);
- kfree(nn->lockowner_ino_hashtbl);
- kfree(nn->ownerstr_hashtbl);
kfree(nn->unconf_id_hashtbl);
kfree(nn->conf_id_hashtbl);
put_net(net);
@@ -5247,22 +6391,22 @@ nfs4_state_shutdown_net(struct net *net)
cancel_delayed_work_sync(&nn->laundromat_work);
locks_end_grace(&nn->nfsd4_manager);
- nfs4_lock_state();
INIT_LIST_HEAD(&reaplist);
spin_lock(&state_lock);
list_for_each_safe(pos, next, &nn->del_recall_lru) {
dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
- list_move(&dp->dl_recall_lru, &reaplist);
+ unhash_delegation_locked(dp);
+ list_add(&dp->dl_recall_lru, &reaplist);
}
spin_unlock(&state_lock);
list_for_each_safe(pos, next, &reaplist) {
dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
- destroy_delegation(dp);
+ list_del_init(&dp->dl_recall_lru);
+ nfs4_put_stid(&dp->dl_stid);
}
nfsd4_client_tracking_exit(net);
nfs4_state_destroy_net(net);
- nfs4_unlock_state();
}
void
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 944275c8f56d..b01f6e100ee8 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -181,28 +181,43 @@ static int zero_clientid(clientid_t *clid)
}
/**
- * defer_free - mark an allocation as deferred freed
- * @argp: NFSv4 compound argument structure to be freed with
- * @release: release callback to free @p, typically kfree()
- * @p: pointer to be freed
+ * svcxdr_tmpalloc - allocate memory to be freed after compound processing
+ * @argp: NFSv4 compound argument structure
+ * @p: pointer to be freed (with kfree())
*
* Marks @p to be freed when processing the compound operation
* described in @argp finishes.
*/
-static int
-defer_free(struct nfsd4_compoundargs *argp,
- void (*release)(const void *), void *p)
+static void *
+svcxdr_tmpalloc(struct nfsd4_compoundargs *argp, u32 len)
{
- struct tmpbuf *tb;
+ struct svcxdr_tmpbuf *tb;
- tb = kmalloc(sizeof(*tb), GFP_KERNEL);
+ tb = kmalloc(sizeof(*tb) + len, GFP_KERNEL);
if (!tb)
- return -ENOMEM;
- tb->buf = p;
- tb->release = release;
+ return NULL;
tb->next = argp->to_free;
argp->to_free = tb;
- return 0;
+ return tb->buf;
+}
+
+/*
+ * For xdr strings that need to be passed to other kernel api's
+ * as null-terminated strings.
+ *
+ * Note null-terminating in place usually isn't safe since the
+ * buffer might end on a page boundary.
+ */
+static char *
+svcxdr_dupstr(struct nfsd4_compoundargs *argp, void *buf, u32 len)
+{
+ char *p = svcxdr_tmpalloc(argp, len + 1);
+
+ if (!p)
+ return NULL;
+ memcpy(p, buf, len);
+ p[len] = '\0';
+ return p;
}
/**
@@ -217,19 +232,13 @@ defer_free(struct nfsd4_compoundargs *argp,
*/
static char *savemem(struct nfsd4_compoundargs *argp, __be32 *p, int nbytes)
{
- if (p == argp->tmp) {
- p = kmemdup(argp->tmp, nbytes, GFP_KERNEL);
- if (!p)
- return NULL;
- } else {
- BUG_ON(p != argp->tmpp);
- argp->tmpp = NULL;
- }
- if (defer_free(argp, kfree, p)) {
- kfree(p);
+ void *ret;
+
+ ret = svcxdr_tmpalloc(argp, nbytes);
+ if (!ret)
return NULL;
- } else
- return (char *)p;
+ memcpy(ret, p, nbytes);
+ return ret;
}
static __be32
@@ -292,12 +301,10 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval,
if (nace > NFS4_ACL_MAX)
return nfserr_fbig;
- *acl = nfs4_acl_new(nace);
+ *acl = svcxdr_tmpalloc(argp, nfs4_acl_bytes(nace));
if (*acl == NULL)
return nfserr_jukebox;
- defer_free(argp, kfree, *acl);
-
(*acl)->naces = nace;
for (ace = (*acl)->aces; ace < (*acl)->aces + nace; ace++) {
READ_BUF(16); len += 16;
@@ -418,12 +425,10 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval,
return nfserr_badlabel;
len += (XDR_QUADLEN(dummy32) << 2);
READMEM(buf, dummy32);
- label->data = kzalloc(dummy32 + 1, GFP_KERNEL);
+ label->len = dummy32;
+ label->data = svcxdr_dupstr(argp, buf, dummy32);
if (!label->data)
return nfserr_jukebox;
- label->len = dummy32;
- defer_free(argp, kfree, label->data);
- memcpy(label->data, buf, dummy32);
}
#endif
@@ -598,20 +603,11 @@ nfsd4_decode_create(struct nfsd4_compoundargs *argp, struct nfsd4_create *create
switch (create->cr_type) {
case NF4LNK:
READ_BUF(4);
- create->cr_linklen = be32_to_cpup(p++);
- READ_BUF(create->cr_linklen);
- /*
- * The VFS will want a null-terminated string, and
- * null-terminating in place isn't safe since this might
- * end on a page boundary:
- */
- create->cr_linkname =
- kmalloc(create->cr_linklen + 1, GFP_KERNEL);
- if (!create->cr_linkname)
+ create->cr_datalen = be32_to_cpup(p++);
+ READ_BUF(create->cr_datalen);
+ create->cr_data = svcxdr_dupstr(argp, p, create->cr_datalen);
+ if (!create->cr_data)
return nfserr_jukebox;
- memcpy(create->cr_linkname, p, create->cr_linklen);
- create->cr_linkname[create->cr_linklen] = '\0';
- defer_free(argp, kfree, create->cr_linkname);
break;
case NF4BLK:
case NF4CHR:
@@ -1481,13 +1477,12 @@ nfsd4_decode_test_stateid(struct nfsd4_compoundargs *argp, struct nfsd4_test_sta
INIT_LIST_HEAD(&test_stateid->ts_stateid_list);
for (i = 0; i < test_stateid->ts_num_ids; i++) {
- stateid = kmalloc(sizeof(struct nfsd4_test_stateid_id), GFP_KERNEL);
+ stateid = svcxdr_tmpalloc(argp, sizeof(*stateid));
if (!stateid) {
status = nfserrno(-ENOMEM);
goto out;
}
- defer_free(argp, kfree, stateid);
INIT_LIST_HEAD(&stateid->ts_id_list);
list_add_tail(&stateid->ts_id_list, &test_stateid->ts_stateid_list);
@@ -1640,7 +1635,7 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
goto xdr_error;
if (argp->opcnt > ARRAY_SIZE(argp->iops)) {
- argp->ops = kmalloc(argp->opcnt * sizeof(*argp->ops), GFP_KERNEL);
+ argp->ops = kzalloc(argp->opcnt * sizeof(*argp->ops), GFP_KERNEL);
if (!argp->ops) {
argp->ops = argp->iops;
dprintk("nfsd: couldn't allocate room for COMPOUND\n");
@@ -2662,6 +2657,7 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen,
struct xdr_stream *xdr = cd->xdr;
int start_offset = xdr->buf->len;
int cookie_offset;
+ u32 name_and_cookie;
int entry_bytes;
__be32 nfserr = nfserr_toosmall;
__be64 wire_offset;
@@ -2723,7 +2719,14 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen,
cd->rd_maxcount -= entry_bytes;
if (!cd->rd_dircount)
goto fail;
- cd->rd_dircount--;
+ /*
+ * RFC 3530 14.2.24 describes rd_dircount as only a "hint", so
+ * let's always let through the first entry, at least:
+ */
+ name_and_cookie = 4 * XDR_QUADLEN(namlen) + 8;
+ if (name_and_cookie > cd->rd_dircount && cd->cookie_offset)
+ goto fail;
+ cd->rd_dircount -= min(cd->rd_dircount, name_and_cookie);
cd->cookie_offset = cookie_offset;
skip_entry:
cd->common.err = nfs_ok;
@@ -3077,11 +3080,8 @@ static __be32 nfsd4_encode_splice_read(
__be32 nfserr;
__be32 *p = xdr->p - 2;
- /*
- * Don't inline pages unless we know there's room for eof,
- * count, and possible padding:
- */
- if (xdr->end - xdr->p < 3)
+ /* Make sure there will be room for padding if needed */
+ if (xdr->end - xdr->p < 1)
return nfserr_resource;
nfserr = nfsd_splice_read(read->rd_rqstp, file,
@@ -3104,7 +3104,8 @@ static __be32 nfsd4_encode_splice_read(
buf->page_len = maxcount;
buf->len += maxcount;
- xdr->page_ptr += (maxcount + PAGE_SIZE - 1) / PAGE_SIZE;
+ xdr->page_ptr += (buf->page_base + maxcount + PAGE_SIZE - 1)
+ / PAGE_SIZE;
/* Use rest of head for padding and remaining ops: */
buf->tail[0].iov_base = xdr->p;
@@ -3147,9 +3148,7 @@ static __be32 nfsd4_encode_readv(struct nfsd4_compoundres *resp,
len = maxcount;
v = 0;
- thislen = (void *)xdr->end - (void *)xdr->p;
- if (len < thislen)
- thislen = len;
+ thislen = min_t(long, len, ((void *)xdr->end - (void *)xdr->p));
p = xdr_reserve_space(xdr, (thislen+3)&~3);
WARN_ON_ONCE(!p);
resp->rqstp->rq_vec[v].iov_base = p;
@@ -3216,10 +3215,8 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr,
xdr_commit_encode(xdr);
maxcount = svc_max_payload(resp->rqstp);
- if (maxcount > xdr->buf->buflen - xdr->buf->len)
- maxcount = xdr->buf->buflen - xdr->buf->len;
- if (maxcount > read->rd_length)
- maxcount = read->rd_length;
+ maxcount = min_t(unsigned long, maxcount, (xdr->buf->buflen - xdr->buf->len));
+ maxcount = min_t(unsigned long, maxcount, read->rd_length);
if (!read->rd_filp) {
err = nfsd_get_tmp_read_open(resp->rqstp, read->rd_fhp,
@@ -3333,6 +3330,10 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4
}
maxcount = min_t(int, maxcount-16, bytes_left);
+ /* RFC 3530 14.2.24 allows us to ignore dircount when it's 0: */
+ if (!readdir->rd_dircount)
+ readdir->rd_dircount = INT_MAX;
+
readdir->xdr = xdr;
readdir->rd_maxcount = maxcount;
readdir->common.err = 0;
@@ -3937,8 +3938,6 @@ status:
*
* XDR note: do not encode rp->rp_buflen: the buffer contains the
* previously sent already encoded operation.
- *
- * called with nfs4_lock_state() held
*/
void
nfsd4_encode_replay(struct xdr_stream *xdr, struct nfsd4_op *op)
@@ -3977,9 +3976,8 @@ int nfsd4_release_compoundargs(void *rq, __be32 *p, void *resp)
kfree(args->tmpp);
args->tmpp = NULL;
while (args->to_free) {
- struct tmpbuf *tb = args->to_free;
+ struct svcxdr_tmpbuf *tb = args->to_free;
args->to_free = tb->next;
- tb->release(tb->buf);
kfree(tb);
}
return 1;
@@ -4012,7 +4010,6 @@ nfs4svc_encode_compoundres(struct svc_rqst *rqstp, __be32 *p, struct nfsd4_compo
/*
* All that remains is to write the tag and operation count...
*/
- struct nfsd4_compound_state *cs = &resp->cstate;
struct xdr_buf *buf = resp->xdr.buf;
WARN_ON_ONCE(buf->len != buf->head[0].iov_len + buf->page_len +
@@ -4026,19 +4023,7 @@ nfs4svc_encode_compoundres(struct svc_rqst *rqstp, __be32 *p, struct nfsd4_compo
p += XDR_QUADLEN(resp->taglen);
*p++ = htonl(resp->opcnt);
- if (nfsd4_has_session(cs)) {
- struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
- struct nfs4_client *clp = cs->session->se_client;
- if (cs->status != nfserr_replay_cache) {
- nfsd4_store_cache_entry(resp);
- cs->slot->sl_flags &= ~NFSD4_SLOT_INUSE;
- }
- /* Renew the clientid on success and on replay */
- spin_lock(&nn->client_lock);
- nfsd4_put_session(cs->session);
- spin_unlock(&nn->client_lock);
- put_client_renew(clp);
- }
+ nfsd4_sequence_done(resp);
return 1;
}
diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c
index 6040da8830ff..ff9567633245 100644
--- a/fs/nfsd/nfscache.c
+++ b/fs/nfsd/nfscache.c
@@ -221,7 +221,12 @@ static void
hash_refile(struct svc_cacherep *rp)
{
hlist_del_init(&rp->c_hash);
- hlist_add_head(&rp->c_hash, cache_hash + hash_32(rp->c_xid, maskbits));
+ /*
+ * No point in byte swapping c_xid since we're just using it to pick
+ * a hash bucket.
+ */
+ hlist_add_head(&rp->c_hash, cache_hash +
+ hash_32((__force u32)rp->c_xid, maskbits));
}
/*
@@ -356,7 +361,11 @@ nfsd_cache_search(struct svc_rqst *rqstp, __wsum csum)
struct hlist_head *rh;
unsigned int entries = 0;
- rh = &cache_hash[hash_32(rqstp->rq_xid, maskbits)];
+ /*
+ * No point in byte swapping rq_xid since we're just using it to pick
+ * a hash bucket.
+ */
+ rh = &cache_hash[hash_32((__force u32)rqstp->rq_xid, maskbits)];
hlist_for_each_entry(rp, rh, c_hash) {
++entries;
if (nfsd_cache_match(rqstp, csum, rp)) {
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 51844048937f..4e042105fb6e 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -39,6 +39,7 @@ enum {
NFSD_Versions,
NFSD_Ports,
NFSD_MaxBlkSize,
+ NFSD_MaxConnections,
NFSD_SupportedEnctypes,
/*
* The below MUST come last. Otherwise we leave a hole in nfsd_files[]
@@ -62,6 +63,7 @@ static ssize_t write_pool_threads(struct file *file, char *buf, size_t size);
static ssize_t write_versions(struct file *file, char *buf, size_t size);
static ssize_t write_ports(struct file *file, char *buf, size_t size);
static ssize_t write_maxblksize(struct file *file, char *buf, size_t size);
+static ssize_t write_maxconn(struct file *file, char *buf, size_t size);
#ifdef CONFIG_NFSD_V4
static ssize_t write_leasetime(struct file *file, char *buf, size_t size);
static ssize_t write_gracetime(struct file *file, char *buf, size_t size);
@@ -77,6 +79,7 @@ static ssize_t (*write_op[])(struct file *, char *, size_t) = {
[NFSD_Versions] = write_versions,
[NFSD_Ports] = write_ports,
[NFSD_MaxBlkSize] = write_maxblksize,
+ [NFSD_MaxConnections] = write_maxconn,
#ifdef CONFIG_NFSD_V4
[NFSD_Leasetime] = write_leasetime,
[NFSD_Gracetime] = write_gracetime,
@@ -369,8 +372,7 @@ static ssize_t write_filehandle(struct file *file, char *buf, size_t size)
if (maxsize < NFS_FHSIZE)
return -EINVAL;
- if (maxsize > NFS3_FHSIZE)
- maxsize = NFS3_FHSIZE;
+ maxsize = min(maxsize, NFS3_FHSIZE);
if (qword_get(&mesg, mesg, size)>0)
return -EINVAL;
@@ -871,10 +873,8 @@ static ssize_t write_maxblksize(struct file *file, char *buf, size_t size)
/* force bsize into allowed range and
* required alignment.
*/
- if (bsize < 1024)
- bsize = 1024;
- if (bsize > NFSSVC_MAXBLKSIZE)
- bsize = NFSSVC_MAXBLKSIZE;
+ bsize = max_t(int, bsize, 1024);
+ bsize = min_t(int, bsize, NFSSVC_MAXBLKSIZE);
bsize &= ~(1024-1);
mutex_lock(&nfsd_mutex);
if (nn->nfsd_serv) {
@@ -889,6 +889,44 @@ static ssize_t write_maxblksize(struct file *file, char *buf, size_t size)
nfsd_max_blksize);
}
+/**
+ * write_maxconn - Set or report the current max number of connections
+ *
+ * Input:
+ * buf: ignored
+ * size: zero
+ * OR
+ *
+ * Input:
+ * buf: C string containing an unsigned
+ * integer value representing the new
+ * number of max connections
+ * size: non-zero length of C string in @buf
+ * Output:
+ * On success: passed-in buffer filled with '\n'-terminated C string
+ * containing numeric value of max_connections setting
+ * for this net namespace;
+ * return code is the size in bytes of the string
+ * On error: return code is zero or a negative errno value
+ */
+static ssize_t write_maxconn(struct file *file, char *buf, size_t size)
+{
+ char *mesg = buf;
+ struct net *net = file->f_dentry->d_sb->s_fs_info;
+ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+ unsigned int maxconn = nn->max_connections;
+
+ if (size > 0) {
+ int rv = get_uint(&mesg, &maxconn);
+
+ if (rv)
+ return rv;
+ nn->max_connections = maxconn;
+ }
+
+ return scnprintf(buf, SIMPLE_TRANSACTION_LIMIT, "%u\n", maxconn);
+}
+
#ifdef CONFIG_NFSD_V4
static ssize_t __nfsd4_write_time(struct file *file, char *buf, size_t size,
time_t *time, struct nfsd_net *nn)
@@ -1064,6 +1102,7 @@ static int nfsd_fill_super(struct super_block * sb, void * data, int silent)
[NFSD_Versions] = {"versions", &transaction_ops, S_IWUSR|S_IRUSR},
[NFSD_Ports] = {"portlist", &transaction_ops, S_IWUSR|S_IRUGO},
[NFSD_MaxBlkSize] = {"max_block_size", &transaction_ops, S_IWUSR|S_IRUGO},
+ [NFSD_MaxConnections] = {"max_connections", &transaction_ops, S_IWUSR|S_IRUGO},
#if defined(CONFIG_SUNRPC_GSS) || defined(CONFIG_SUNRPC_GSS_MODULE)
[NFSD_SupportedEnctypes] = {"supported_krb5_enctypes", &supported_enctypes_ops, S_IRUGO},
#endif /* CONFIG_SUNRPC_GSS or CONFIG_SUNRPC_GSS_MODULE */
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index ec8393418154..e883a5868be6 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -162,7 +162,14 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp)
/* deprecated, convert to type 3 */
len = key_len(FSID_ENCODE_DEV)/4;
fh->fh_fsid_type = FSID_ENCODE_DEV;
- fh->fh_fsid[0] = new_encode_dev(MKDEV(ntohl(fh->fh_fsid[0]), ntohl(fh->fh_fsid[1])));
+ /*
+ * struct knfsd_fh uses host-endian fields, which are
+ * sometimes used to hold net-endian values. This
+ * confuses sparse, so we must use __force here to
+ * keep it from complaining.
+ */
+ fh->fh_fsid[0] = new_encode_dev(MKDEV(ntohl((__force __be32)fh->fh_fsid[0]),
+ ntohl((__force __be32)fh->fh_fsid[1])));
fh->fh_fsid[1] = fh->fh_fsid[2];
}
data_left -= len;
@@ -539,8 +546,7 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry,
dentry);
fhp->fh_dentry = dget(dentry); /* our internal copy */
- fhp->fh_export = exp;
- cache_get(&exp->h);
+ fhp->fh_export = exp_get(exp);
if (fhp->fh_handle.fh_version == 0xca) {
/* old style filehandle please */
diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h
index 2e89e70ac15c..08236d70c667 100644
--- a/fs/nfsd/nfsfh.h
+++ b/fs/nfsd/nfsfh.h
@@ -73,8 +73,15 @@ enum fsid_source {
extern enum fsid_source fsid_source(struct svc_fh *fhp);
-/* This might look a little large to "inline" but in all calls except
+/*
+ * This might look a little large to "inline" but in all calls except
* one, 'vers' is constant so moste of the function disappears.
+ *
+ * In some cases the values are considered to be host endian and in
+ * others, net endian. fsidv is always considered to be u32 as the
+ * callers don't know which it will be. So we must use __force to keep
+ * sparse from complaining. Since these values are opaque to the
+ * client, that shouldn't be a problem.
*/
static inline void mk_fsid(int vers, u32 *fsidv, dev_t dev, ino_t ino,
u32 fsid, unsigned char *uuid)
@@ -82,7 +89,7 @@ static inline void mk_fsid(int vers, u32 *fsidv, dev_t dev, ino_t ino,
u32 *up;
switch(vers) {
case FSID_DEV:
- fsidv[0] = htonl((MAJOR(dev)<<16) |
+ fsidv[0] = (__force __u32)htonl((MAJOR(dev)<<16) |
MINOR(dev));
fsidv[1] = ino_t_to_u32(ino);
break;
@@ -90,8 +97,8 @@ static inline void mk_fsid(int vers, u32 *fsidv, dev_t dev, ino_t ino,
fsidv[0] = fsid;
break;
case FSID_MAJOR_MINOR:
- fsidv[0] = htonl(MAJOR(dev));
- fsidv[1] = htonl(MINOR(dev));
+ fsidv[0] = (__force __u32)htonl(MAJOR(dev));
+ fsidv[1] = (__force __u32)htonl(MINOR(dev));
fsidv[2] = ino_t_to_u32(ino);
break;
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index 54c6b3d3cc79..b8680738f588 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -403,12 +403,13 @@ nfsd_proc_symlink(struct svc_rqst *rqstp, struct nfsd_symlinkargs *argp,
fh_init(&newfh, NFS_FHSIZE);
/*
- * Create the link, look up new file and set attrs.
+ * Crazy hack: the request fits in a page, and already-decoded
+ * attributes follow argp->tname, so it's safe to just write a
+ * null to ensure it's null-terminated:
*/
+ argp->tname[argp->tlen] = '\0';
nfserr = nfsd_symlink(rqstp, &argp->ffh, argp->fname, argp->flen,
- argp->tname, argp->tlen,
- &newfh, &argp->attrs);
-
+ argp->tname, &newfh);
fh_put(&argp->ffh);
fh_put(&newfh);
@@ -716,6 +717,7 @@ nfserrno (int errno)
{ nfserr_noent, -ENOENT },
{ nfserr_io, -EIO },
{ nfserr_nxio, -ENXIO },
+ { nfserr_fbig, -E2BIG },
{ nfserr_acces, -EACCES },
{ nfserr_exist, -EEXIST },
{ nfserr_xdev, -EXDEV },
@@ -743,6 +745,7 @@ nfserrno (int errno)
{ nfserr_notsupp, -EOPNOTSUPP },
{ nfserr_toosmall, -ETOOSMALL },
{ nfserr_serverfault, -ESERVERFAULT },
+ { nfserr_serverfault, -ENFILE },
};
int i;
@@ -750,7 +753,7 @@ nfserrno (int errno)
if (nfs_errtbl[i].syserr == errno)
return nfs_errtbl[i].nfserr;
}
- printk (KERN_INFO "nfsd: non-standard errno: %d\n", errno);
+ WARN(1, "nfsd: non-standard errno: %d\n", errno);
return nfserr_io;
}
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 1879e43f2868..752d56bbe0ba 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -221,7 +221,8 @@ static int nfsd_startup_generic(int nrservs)
*/
ret = nfsd_racache_init(2*nrservs);
if (ret)
- return ret;
+ goto dec_users;
+
ret = nfs4_state_start();
if (ret)
goto out_racache;
@@ -229,6 +230,8 @@ static int nfsd_startup_generic(int nrservs)
out_racache:
nfsd_racache_shutdown();
+dec_users:
+ nfsd_users--;
return ret;
}
@@ -405,6 +408,7 @@ int nfsd_create_serv(struct net *net)
if (nn->nfsd_serv == NULL)
return -ENOMEM;
+ nn->nfsd_serv->sv_maxconn = nn->max_connections;
error = svc_bind(nn->nfsd_serv, net);
if (error < 0) {
svc_destroy(nn->nfsd_serv);
@@ -469,8 +473,7 @@ int nfsd_set_nrthreads(int n, int *nthreads, struct net *net)
/* enforce a global maximum number of threads */
tot = 0;
for (i = 0; i < n; i++) {
- if (nthreads[i] > NFSD_MAXSERVS)
- nthreads[i] = NFSD_MAXSERVS;
+ nthreads[i] = min(nthreads[i], NFSD_MAXSERVS);
tot += nthreads[i];
}
if (tot > NFSD_MAXSERVS) {
@@ -519,11 +522,11 @@ nfsd_svc(int nrservs, struct net *net)
mutex_lock(&nfsd_mutex);
dprintk("nfsd: creating service\n");
- if (nrservs <= 0)
- nrservs = 0;
- if (nrservs > NFSD_MAXSERVS)
- nrservs = NFSD_MAXSERVS;
+
+ nrservs = max(nrservs, 0);
+ nrservs = min(nrservs, NFSD_MAXSERVS);
error = 0;
+
if (nrservs == 0 && nn->nfsd_serv == NULL)
goto out;
@@ -564,6 +567,7 @@ nfsd(void *vrqstp)
struct svc_rqst *rqstp = (struct svc_rqst *) vrqstp;
struct svc_xprt *perm_sock = list_entry(rqstp->rq_server->sv_permsocks.next, typeof(struct svc_xprt), xpt_list);
struct net *net = perm_sock->xpt_net;
+ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
int err;
/* Lock module and set up kernel thread */
@@ -597,6 +601,9 @@ nfsd(void *vrqstp)
* The main request loop
*/
for (;;) {
+ /* Update sv_maxconn if it has changed */
+ rqstp->rq_server->sv_maxconn = nn->max_connections;
+
/*
* Find a socket with data available and call its
* recvfrom routine.
diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c
index 1ac306b769df..412d7061f9e5 100644
--- a/fs/nfsd/nfsxdr.c
+++ b/fs/nfsd/nfsxdr.c
@@ -257,8 +257,7 @@ nfssvc_decode_readargs(struct svc_rqst *rqstp, __be32 *p,
len = args->count = ntohl(*p++);
p++; /* totalcount - unused */
- if (len > NFSSVC_MAXBLKSIZE_V2)
- len = NFSSVC_MAXBLKSIZE_V2;
+ len = min_t(unsigned int, len, NFSSVC_MAXBLKSIZE_V2);
/* set up somewhere to store response.
* We take pages, put them on reslist and include in iovec
@@ -268,7 +267,7 @@ nfssvc_decode_readargs(struct svc_rqst *rqstp, __be32 *p,
struct page *p = *(rqstp->rq_next_page++);
rqstp->rq_vec[v].iov_base = page_address(p);
- rqstp->rq_vec[v].iov_len = len < PAGE_SIZE?len:PAGE_SIZE;
+ rqstp->rq_vec[v].iov_len = min_t(unsigned int, len, PAGE_SIZE);
len -= rqstp->rq_vec[v].iov_len;
v++;
}
@@ -400,9 +399,7 @@ nfssvc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p,
return 0;
args->cookie = ntohl(*p++);
args->count = ntohl(*p++);
- if (args->count > PAGE_SIZE)
- args->count = PAGE_SIZE;
-
+ args->count = min_t(u32, args->count, PAGE_SIZE);
args->buffer = page_address(*(rqstp->rq_next_page++));
return xdr_argsize_check(rqstp, p);
@@ -516,10 +513,11 @@ nfssvc_encode_entry(void *ccdv, const char *name,
}
if (cd->offset)
*cd->offset = htonl(offset);
- if (namlen > NFS2_MAXNAMLEN)
- namlen = NFS2_MAXNAMLEN;/* truncate filename */
+ /* truncate filename */
+ namlen = min(namlen, NFS2_MAXNAMLEN);
slen = XDR_QUADLEN(namlen);
+
if ((buflen = cd->buflen - slen - 4) < 0) {
cd->common.err = nfserr_toosmall;
return -EINVAL;
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 374c66283ac5..4a89e00d7461 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -72,7 +72,13 @@ struct nfsd4_callback {
bool cb_done;
};
+/*
+ * A core object that represents a "common" stateid. These are generally
+ * embedded within the different (more specific) stateid objects and contain
+ * fields that are of general use to any stateid.
+ */
struct nfs4_stid {
+ atomic_t sc_count;
#define NFS4_OPEN_STID 1
#define NFS4_LOCK_STID 2
#define NFS4_DELEG_STID 4
@@ -80,22 +86,43 @@ struct nfs4_stid {
#define NFS4_CLOSED_STID 8
/* For a deleg stateid kept around only to process free_stateid's: */
#define NFS4_REVOKED_DELEG_STID 16
+#define NFS4_CLOSED_DELEG_STID 32
unsigned char sc_type;
stateid_t sc_stateid;
struct nfs4_client *sc_client;
+ struct nfs4_file *sc_file;
+ void (*sc_free)(struct nfs4_stid *);
};
+/*
+ * Represents a delegation stateid. The nfs4_client holds references to these
+ * and they are put when it is being destroyed or when the delegation is
+ * returned by the client:
+ *
+ * o 1 reference as long as a delegation is still in force (taken when it's
+ * alloc'd, put when it's returned or revoked)
+ *
+ * o 1 reference as long as a recall rpc is in progress (taken when the lease
+ * is broken, put when the rpc exits)
+ *
+ * o 1 more ephemeral reference for each nfsd thread currently doing something
+ * with that delegation without holding the cl_lock
+ *
+ * If the server attempts to recall a delegation and the client doesn't do so
+ * before a timeout, the server may also revoke the delegation. In that case,
+ * the object will either be destroyed (v4.0) or moved to a per-client list of
+ * revoked delegations (v4.1+).
+ *
+ * This object is a superset of the nfs4_stid.
+ */
struct nfs4_delegation {
struct nfs4_stid dl_stid; /* must be first field */
struct list_head dl_perfile;
struct list_head dl_perclnt;
struct list_head dl_recall_lru; /* delegation recalled */
- atomic_t dl_count; /* ref count */
- struct nfs4_file *dl_file;
u32 dl_type;
time_t dl_time;
/* For recall: */
- struct knfsd_fh dl_fh;
int dl_retries;
struct nfsd4_callback dl_recall;
};
@@ -194,6 +221,11 @@ struct nfsd4_conn {
unsigned char cn_flags;
};
+/*
+ * Representation of a v4.1+ session. These are refcounted in a similar fashion
+ * to the nfs4_client. References are only taken when the server is actively
+ * working on the object (primarily during the processing of compounds).
+ */
struct nfsd4_session {
atomic_t se_ref;
struct list_head se_hash; /* hash by sessionid */
@@ -212,8 +244,6 @@ struct nfsd4_session {
struct nfsd4_slot *se_slots[]; /* forward channel slots */
};
-extern void nfsd4_put_session(struct nfsd4_session *ses);
-
/* formatted contents of nfs4_sessionid */
struct nfsd4_sessionid {
clientid_t clientid;
@@ -225,17 +255,35 @@ struct nfsd4_sessionid {
/*
* struct nfs4_client - one per client. Clientids live here.
- * o Each nfs4_client is hashed by clientid.
*
- * o Each nfs4_clients is also hashed by name
- * (the opaque quantity initially sent by the client to identify itself).
+ * The initial object created by an NFS client using SETCLIENTID (for NFSv4.0)
+ * or EXCHANGE_ID (for NFSv4.1+). These objects are refcounted and timestamped.
+ * Each nfsd_net_ns object contains a set of these and they are tracked via
+ * short and long form clientid. They are hashed and searched for under the
+ * per-nfsd_net client_lock spinlock.
+ *
+ * References to it are only held during the processing of compounds, and in
+ * certain other operations. In their "resting state" they have a refcount of
+ * 0. If they are not renewed within a lease period, they become eligible for
+ * destruction by the laundromat.
+ *
+ * These objects can also be destroyed prematurely by the fault injection code,
+ * or if the client sends certain forms of SETCLIENTID or EXCHANGE_ID updates.
+ * Care is taken *not* to do this however when the objects have an elevated
+ * refcount.
+ *
+ * o Each nfs4_client is hashed by clientid
+ *
+ * o Each nfs4_clients is also hashed by name (the opaque quantity initially
+ * sent by the client to identify itself).
*
- * o cl_perclient list is used to ensure no dangling stateowner references
- * when we expire the nfs4_client
+ * o cl_perclient list is used to ensure no dangling stateowner references
+ * when we expire the nfs4_client
*/
struct nfs4_client {
struct list_head cl_idhash; /* hash by cl_clientid.id */
struct rb_node cl_namenode; /* link into by-name trees */
+ struct list_head *cl_ownerstr_hashtbl;
struct list_head cl_openowners;
struct idr cl_stateids; /* stateid lookup */
struct list_head cl_delegations;
@@ -329,21 +377,43 @@ struct nfs4_replay {
unsigned int rp_buflen;
char *rp_buf;
struct knfsd_fh rp_openfh;
+ struct mutex rp_mutex;
char rp_ibuf[NFSD4_REPLAY_ISIZE];
};
+struct nfs4_stateowner;
+
+struct nfs4_stateowner_operations {
+ void (*so_unhash)(struct nfs4_stateowner *);
+ void (*so_free)(struct nfs4_stateowner *);
+};
+
+/*
+ * A core object that represents either an open or lock owner. The object and
+ * lock owner objects have one of these embedded within them. Refcounts and
+ * other fields common to both owner types are contained within these
+ * structures.
+ */
struct nfs4_stateowner {
- struct list_head so_strhash; /* hash by op_name */
- struct list_head so_stateids;
- struct nfs4_client * so_client;
- /* after increment in ENCODE_SEQID_OP_TAIL, represents the next
+ struct list_head so_strhash;
+ struct list_head so_stateids;
+ struct nfs4_client *so_client;
+ const struct nfs4_stateowner_operations *so_ops;
+ /* after increment in nfsd4_bump_seqid, represents the next
* sequence id expected from the client: */
- u32 so_seqid;
- struct xdr_netobj so_owner; /* open owner name */
- struct nfs4_replay so_replay;
- bool so_is_open_owner;
+ atomic_t so_count;
+ u32 so_seqid;
+ struct xdr_netobj so_owner; /* open owner name */
+ struct nfs4_replay so_replay;
+ bool so_is_open_owner;
};
+/*
+ * When a file is opened, the client provides an open state owner opaque string
+ * that indicates the "owner" of that open. These objects are refcounted.
+ * References to it are held by each open state associated with it. This object
+ * is a superset of the nfs4_stateowner struct.
+ */
struct nfs4_openowner {
struct nfs4_stateowner oo_owner; /* must be first field */
struct list_head oo_perclient;
@@ -358,15 +428,17 @@ struct nfs4_openowner {
struct nfs4_ol_stateid *oo_last_closed_stid;
time_t oo_time; /* time of placement on so_close_lru */
#define NFS4_OO_CONFIRMED 1
-#define NFS4_OO_NEW 4
unsigned char oo_flags;
};
+/*
+ * Represents a generic "lockowner". Similar to an openowner. References to it
+ * are held by the lock stateids that are created on its behalf. This object is
+ * a superset of the nfs4_stateowner struct (or would be if it needed any extra
+ * fields).
+ */
struct nfs4_lockowner {
struct nfs4_stateowner lo_owner; /* must be first element */
- struct list_head lo_owner_ino_hash; /* hash by owner,file */
- struct list_head lo_perstateid;
- struct list_head lo_list; /* for temporary uses */
};
static inline struct nfs4_openowner * openowner(struct nfs4_stateowner *so)
@@ -379,9 +451,17 @@ static inline struct nfs4_lockowner * lockowner(struct nfs4_stateowner *so)
return container_of(so, struct nfs4_lockowner, lo_owner);
}
-/* nfs4_file: a file opened by some number of (open) nfs4_stateowners. */
+/*
+ * nfs4_file: a file opened by some number of (open) nfs4_stateowners.
+ *
+ * These objects are global. nfsd only keeps one instance of a nfs4_file per
+ * inode (though it may keep multiple file descriptors open per inode). These
+ * are tracked in the file_hashtbl which is protected by the state_lock
+ * spinlock.
+ */
struct nfs4_file {
atomic_t fi_ref;
+ spinlock_t fi_lock;
struct hlist_node fi_hash; /* hash by "struct inode *" */
struct list_head fi_stateids;
struct list_head fi_delegations;
@@ -395,49 +475,36 @@ struct nfs4_file {
* + 1 to both of the above if NFS4_SHARE_ACCESS_BOTH is set.
*/
atomic_t fi_access[2];
+ u32 fi_share_deny;
struct file *fi_deleg_file;
struct file_lock *fi_lease;
atomic_t fi_delegees;
- struct inode *fi_inode;
+ struct knfsd_fh fi_fhandle;
bool fi_had_conflict;
};
-/* XXX: for first cut may fall back on returning file that doesn't work
- * at all? */
-static inline struct file *find_writeable_file(struct nfs4_file *f)
-{
- if (f->fi_fds[O_WRONLY])
- return f->fi_fds[O_WRONLY];
- return f->fi_fds[O_RDWR];
-}
-
-static inline struct file *find_readable_file(struct nfs4_file *f)
-{
- if (f->fi_fds[O_RDONLY])
- return f->fi_fds[O_RDONLY];
- return f->fi_fds[O_RDWR];
-}
-
-static inline struct file *find_any_file(struct nfs4_file *f)
-{
- if (f->fi_fds[O_RDWR])
- return f->fi_fds[O_RDWR];
- else if (f->fi_fds[O_WRONLY])
- return f->fi_fds[O_WRONLY];
- else
- return f->fi_fds[O_RDONLY];
-}
-
-/* "ol" stands for "Open or Lock". Better suggestions welcome. */
+/*
+ * A generic struct representing either a open or lock stateid. The nfs4_client
+ * holds a reference to each of these objects, and they in turn hold a
+ * reference to their respective stateowners. The client's reference is
+ * released in response to a close or unlock (depending on whether it's an open
+ * or lock stateid) or when the client is being destroyed.
+ *
+ * In the case of v4.0 open stateids, these objects are preserved for a little
+ * while after close in order to handle CLOSE replays. Those are eventually
+ * reclaimed via a LRU scheme by the laundromat.
+ *
+ * This object is a superset of the nfs4_stid. "ol" stands for "Open or Lock".
+ * Better suggestions welcome.
+ */
struct nfs4_ol_stateid {
struct nfs4_stid st_stid; /* must be first field */
struct list_head st_perfile;
struct list_head st_perstateowner;
- struct list_head st_lockowners;
+ struct list_head st_locks;
struct nfs4_stateowner * st_stateowner;
- struct nfs4_file * st_file;
- unsigned long st_access_bmap;
- unsigned long st_deny_bmap;
+ unsigned char st_access_bmap;
+ unsigned char st_deny_bmap;
struct nfs4_ol_stateid * st_openstp;
};
@@ -456,15 +523,16 @@ struct nfsd_net;
extern __be32 nfs4_preprocess_stateid_op(struct net *net,
struct nfsd4_compound_state *cstate,
stateid_t *stateid, int flags, struct file **filp);
-extern void nfs4_lock_state(void);
-extern void nfs4_unlock_state(void);
+void nfs4_put_stid(struct nfs4_stid *s);
void nfs4_remove_reclaim_record(struct nfs4_client_reclaim *, struct nfsd_net *);
extern void nfs4_release_reclaim(struct nfsd_net *);
extern struct nfs4_client_reclaim *nfsd4_find_reclaim_client(const char *recdir,
struct nfsd_net *nn);
-extern __be32 nfs4_check_open_reclaim(clientid_t *clid, bool sessions, struct nfsd_net *nn);
+extern __be32 nfs4_check_open_reclaim(clientid_t *clid,
+ struct nfsd4_compound_state *cstate, struct nfsd_net *nn);
extern int set_callback_cred(void);
-extern void nfsd4_init_callback(struct nfsd4_callback *);
+void nfsd4_run_cb_null(struct work_struct *w);
+void nfsd4_run_cb_recall(struct work_struct *w);
extern void nfsd4_probe_callback(struct nfs4_client *clp);
extern void nfsd4_probe_callback_sync(struct nfs4_client *clp);
extern void nfsd4_change_callback(struct nfs4_client *clp, struct nfs4_cb_conn *);
@@ -472,11 +540,10 @@ extern void nfsd4_cb_recall(struct nfs4_delegation *dp);
extern int nfsd4_create_callback_queue(void);
extern void nfsd4_destroy_callback_queue(void);
extern void nfsd4_shutdown_callback(struct nfs4_client *);
-extern void nfs4_put_delegation(struct nfs4_delegation *dp);
+extern void nfsd4_prepare_cb_recall(struct nfs4_delegation *dp);
extern struct nfs4_client_reclaim *nfs4_client_to_reclaim(const char *name,
struct nfsd_net *nn);
extern bool nfs4_has_reclaimed_state(const char *name, struct nfsd_net *nn);
-extern void put_client_renew(struct nfs4_client *clp);
/* nfs4recover operations */
extern int nfsd4_client_tracking_init(struct net *net);
@@ -490,19 +557,24 @@ extern void nfsd4_record_grace_done(struct nfsd_net *nn, time_t boot_time);
#ifdef CONFIG_NFSD_FAULT_INJECTION
int nfsd_fault_inject_init(void);
void nfsd_fault_inject_cleanup(void);
-u64 nfsd_for_n_state(u64, u64 (*)(struct nfs4_client *, u64));
-struct nfs4_client *nfsd_find_client(struct sockaddr_storage *, size_t);
-
-u64 nfsd_forget_client(struct nfs4_client *, u64);
-u64 nfsd_forget_client_locks(struct nfs4_client*, u64);
-u64 nfsd_forget_client_openowners(struct nfs4_client *, u64);
-u64 nfsd_forget_client_delegations(struct nfs4_client *, u64);
-u64 nfsd_recall_client_delegations(struct nfs4_client *, u64);
-
-u64 nfsd_print_client(struct nfs4_client *, u64);
-u64 nfsd_print_client_locks(struct nfs4_client *, u64);
-u64 nfsd_print_client_openowners(struct nfs4_client *, u64);
-u64 nfsd_print_client_delegations(struct nfs4_client *, u64);
+
+u64 nfsd_inject_print_clients(void);
+u64 nfsd_inject_forget_client(struct sockaddr_storage *, size_t);
+u64 nfsd_inject_forget_clients(u64);
+
+u64 nfsd_inject_print_locks(void);
+u64 nfsd_inject_forget_client_locks(struct sockaddr_storage *, size_t);
+u64 nfsd_inject_forget_locks(u64);
+
+u64 nfsd_inject_print_openowners(void);
+u64 nfsd_inject_forget_client_openowners(struct sockaddr_storage *, size_t);
+u64 nfsd_inject_forget_openowners(u64);
+
+u64 nfsd_inject_print_delegations(void);
+u64 nfsd_inject_forget_client_delegations(struct sockaddr_storage *, size_t);
+u64 nfsd_inject_forget_delegations(u64);
+u64 nfsd_inject_recall_client_delegations(struct sockaddr_storage *, size_t);
+u64 nfsd_inject_recall_delegations(u64);
#else /* CONFIG_NFSD_FAULT_INJECTION */
static inline int nfsd_fault_inject_init(void) { return 0; }
static inline void nfsd_fault_inject_cleanup(void) {}
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index d49c778faecb..6ab077bb897e 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -189,8 +189,7 @@ nfsd_lookup_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp,
dprintk("nfsd: nfsd_lookup(fh %s, %.*s)\n", SVCFH_fmt(fhp), len,name);
dparent = fhp->fh_dentry;
- exp = fhp->fh_export;
- exp_get(exp);
+ exp = exp_get(fhp->fh_export);
/* Lookup the name, but don't follow links */
if (isdotent(name, len)) {
@@ -464,7 +463,7 @@ out_put_write_access:
if (size_change)
put_write_access(inode);
if (!err)
- commit_metadata(fhp);
+ err = nfserrno(commit_metadata(fhp));
out:
return err;
}
@@ -820,7 +819,8 @@ static int nfsd_direct_splice_actor(struct pipe_inode_info *pipe,
return __splice_from_pipe(pipe, sd, nfsd_splice_actor);
}
-__be32 nfsd_finish_read(struct file *file, unsigned long *count, int host_err)
+static __be32
+nfsd_finish_read(struct file *file, unsigned long *count, int host_err)
{
if (host_err >= 0) {
nfsdstats.io_read += host_err;
@@ -831,7 +831,7 @@ __be32 nfsd_finish_read(struct file *file, unsigned long *count, int host_err)
return nfserrno(host_err);
}
-int nfsd_splice_read(struct svc_rqst *rqstp,
+__be32 nfsd_splice_read(struct svc_rqst *rqstp,
struct file *file, loff_t offset, unsigned long *count)
{
struct splice_desc sd = {
@@ -847,7 +847,7 @@ int nfsd_splice_read(struct svc_rqst *rqstp,
return nfsd_finish_read(file, count, host_err);
}
-int nfsd_readv(struct file *file, loff_t offset, struct kvec *vec, int vlen,
+__be32 nfsd_readv(struct file *file, loff_t offset, struct kvec *vec, int vlen,
unsigned long *count)
{
mm_segment_t oldfs;
@@ -1121,7 +1121,8 @@ nfsd_create_setattr(struct svc_rqst *rqstp, struct svc_fh *resfhp,
iap->ia_valid &= ~(ATTR_UID|ATTR_GID);
if (iap->ia_valid)
return nfsd_setattr(rqstp, resfhp, iap, 0, (time_t)0);
- return 0;
+ /* Callers expect file metadata to be committed here */
+ return nfserrno(commit_metadata(resfhp));
}
/* HPUX client sometimes creates a file in mode 000, and sets size to 0.
@@ -1253,9 +1254,10 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
err = nfsd_create_setattr(rqstp, resfhp, iap);
/*
- * nfsd_setattr already committed the child. Transactional filesystems
- * had a chance to commit changes for both parent and child
- * simultaneously making the following commit_metadata a noop.
+ * nfsd_create_setattr already committed the child. Transactional
+ * filesystems had a chance to commit changes for both parent and
+ * child * simultaneously making the following commit_metadata a
+ * noop.
*/
err2 = nfserrno(commit_metadata(fhp));
if (err2)
@@ -1426,7 +1428,8 @@ do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
err = nfsd_create_setattr(rqstp, resfhp, iap);
/*
- * nfsd_setattr already committed the child (and possibly also the parent).
+ * nfsd_create_setattr already committed the child
+ * (and possibly also the parent).
*/
if (!err)
err = nfserrno(commit_metadata(fhp));
@@ -1504,16 +1507,15 @@ out_nfserr:
__be32
nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp,
char *fname, int flen,
- char *path, int plen,
- struct svc_fh *resfhp,
- struct iattr *iap)
+ char *path,
+ struct svc_fh *resfhp)
{
struct dentry *dentry, *dnew;
__be32 err, cerr;
int host_err;
err = nfserr_noent;
- if (!flen || !plen)
+ if (!flen || path[0] == '\0')
goto out;
err = nfserr_exist;
if (isdotent(fname, flen))
@@ -1534,18 +1536,7 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp,
if (IS_ERR(dnew))
goto out_nfserr;
- if (unlikely(path[plen] != 0)) {
- char *path_alloced = kmalloc(plen+1, GFP_KERNEL);
- if (path_alloced == NULL)
- host_err = -ENOMEM;
- else {
- strncpy(path_alloced, path, plen);
- path_alloced[plen] = 0;
- host_err = vfs_symlink(dentry->d_inode, dnew, path_alloced);
- kfree(path_alloced);
- }
- } else
- host_err = vfs_symlink(dentry->d_inode, dnew, path);
+ host_err = vfs_symlink(dentry->d_inode, dnew, path);
err = nfserrno(host_err);
if (!err)
err = nfserrno(commit_metadata(fhp));
@@ -2093,8 +2084,7 @@ nfsd_racache_init(int cache_size)
if (raparm_hash[0].pb_head)
return 0;
nperbucket = DIV_ROUND_UP(cache_size, RAPARM_HASH_SIZE);
- if (nperbucket < 2)
- nperbucket = 2;
+ nperbucket = max(2, nperbucket);
cache_size = nperbucket * RAPARM_HASH_SIZE;
dprintk("nfsd: allocating %d readahead buffers.\n", cache_size);
diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
index 91b6ae3f658b..c2ff3f14e5f6 100644
--- a/fs/nfsd/vfs.h
+++ b/fs/nfsd/vfs.h
@@ -74,9 +74,9 @@ struct raparms;
__be32 nfsd_get_tmp_read_open(struct svc_rqst *, struct svc_fh *,
struct file **, struct raparms **);
void nfsd_put_tmp_read_open(struct file *, struct raparms *);
-int nfsd_splice_read(struct svc_rqst *,
+__be32 nfsd_splice_read(struct svc_rqst *,
struct file *, loff_t, unsigned long *);
-int nfsd_readv(struct file *, loff_t, struct kvec *, int,
+__be32 nfsd_readv(struct file *, loff_t, struct kvec *, int,
unsigned long *);
__be32 nfsd_read(struct svc_rqst *, struct svc_fh *,
loff_t, struct kvec *, int, unsigned long *);
@@ -85,8 +85,8 @@ __be32 nfsd_write(struct svc_rqst *, struct svc_fh *,struct file *,
__be32 nfsd_readlink(struct svc_rqst *, struct svc_fh *,
char *, int *);
__be32 nfsd_symlink(struct svc_rqst *, struct svc_fh *,
- char *name, int len, char *path, int plen,
- struct svc_fh *res, struct iattr *);
+ char *name, int len, char *path,
+ struct svc_fh *res);
__be32 nfsd_link(struct svc_rqst *, struct svc_fh *,
char *, int, struct svc_fh *);
__be32 nfsd_rename(struct svc_rqst *,
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index 18cbb6d9c8a9..465e7799742a 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -55,6 +55,7 @@ struct nfsd4_compound_state {
struct svc_fh current_fh;
struct svc_fh save_fh;
struct nfs4_stateowner *replay_owner;
+ struct nfs4_client *clp;
/* For sessions DRC */
struct nfsd4_session *session;
struct nfsd4_slot *slot;
@@ -107,8 +108,8 @@ struct nfsd4_create {
u32 cr_type; /* request */
union { /* request */
struct {
- u32 namelen;
- char *name;
+ u32 datalen;
+ char *data;
} link; /* NF4LNK */
struct {
u32 specdata1;
@@ -121,8 +122,8 @@ struct nfsd4_create {
struct nfs4_acl *cr_acl;
struct xdr_netobj cr_label;
};
-#define cr_linklen u.link.namelen
-#define cr_linkname u.link.name
+#define cr_datalen u.link.datalen
+#define cr_data u.link.data
#define cr_specdata1 u.dev.specdata1
#define cr_specdata2 u.dev.specdata2
@@ -478,6 +479,14 @@ struct nfsd4_op {
bool nfsd4_cache_this_op(struct nfsd4_op *);
+/*
+ * Memory needed just for the duration of processing one compound:
+ */
+struct svcxdr_tmpbuf {
+ struct svcxdr_tmpbuf *next;
+ char buf[];
+};
+
struct nfsd4_compoundargs {
/* scratch variables for XDR decode */
__be32 * p;
@@ -486,11 +495,7 @@ struct nfsd4_compoundargs {
int pagelen;
__be32 tmp[8];
__be32 * tmpp;
- struct tmpbuf {
- struct tmpbuf *next;
- void (*release)(const void *);
- void *buf;
- } *to_free;
+ struct svcxdr_tmpbuf *to_free;
struct svc_rqst *rqstp;
@@ -574,7 +579,6 @@ extern __be32 nfsd4_setclientid(struct svc_rqst *rqstp,
extern __be32 nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
struct nfsd4_compound_state *,
struct nfsd4_setclientid_confirm *setclientid_confirm);
-extern void nfsd4_store_cache_entry(struct nfsd4_compoundres *resp);
extern __be32 nfsd4_exchange_id(struct svc_rqst *rqstp,
struct nfsd4_compound_state *, struct nfsd4_exchange_id *);
extern __be32 nfsd4_backchannel_ctl(struct svc_rqst *, struct nfsd4_compound_state *, struct nfsd4_backchannel_ctl *);
@@ -585,6 +589,7 @@ extern __be32 nfsd4_create_session(struct svc_rqst *,
extern __be32 nfsd4_sequence(struct svc_rqst *,
struct nfsd4_compound_state *,
struct nfsd4_sequence *);
+extern void nfsd4_sequence_done(struct nfsd4_compoundres *resp);
extern __be32 nfsd4_destroy_session(struct svc_rqst *,
struct nfsd4_compound_state *,
struct nfsd4_destroy_session *);
@@ -594,7 +599,9 @@ extern __be32 nfsd4_process_open1(struct nfsd4_compound_state *,
struct nfsd4_open *open, struct nfsd_net *nn);
extern __be32 nfsd4_process_open2(struct svc_rqst *rqstp,
struct svc_fh *current_fh, struct nfsd4_open *open);
-extern void nfsd4_cleanup_open_state(struct nfsd4_open *open, __be32 status);
+extern void nfsd4_cstate_clear_replay(struct nfsd4_compound_state *cstate);
+extern void nfsd4_cleanup_open_state(struct nfsd4_compound_state *cstate,
+ struct nfsd4_open *open, __be32 status);
extern __be32 nfsd4_open_confirm(struct svc_rqst *rqstp,
struct nfsd4_compound_state *, struct nfsd4_open_confirm *oc);
extern __be32 nfsd4_close(struct svc_rqst *rqstp,
@@ -625,6 +632,7 @@ extern __be32 nfsd4_test_stateid(struct svc_rqst *rqstp,
extern __be32 nfsd4_free_stateid(struct svc_rqst *rqstp,
struct nfsd4_compound_state *, struct nfsd4_free_stateid *free_stateid);
extern void nfsd4_bump_seqid(struct nfsd4_compound_state *, __be32 nfserr);
+
#endif
/*