diff options
author | 2014-11-19 21:32:12 +1100 | |
---|---|---|
committer | 2014-11-19 21:32:12 +1100 | |
commit | b10778a00d40b3d9fdaaf5891e802794781ff71c (patch) | |
tree | 6ba4cbac86eecedc3f30650e7f764ecf00c83898 /fs/nfs | |
parent | integrity: do zero padding of the key id (diff) | |
parent | Linux 3.17 (diff) | |
download | wireguard-linux-b10778a00d40b3d9fdaaf5891e802794781ff71c.tar.xz wireguard-linux-b10778a00d40b3d9fdaaf5891e802794781ff71c.zip |
Merge commit 'v3.17' into next
Diffstat (limited to '')
59 files changed, 3930 insertions, 2355 deletions
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index 9b431f44fad9..cbb1797149d5 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c @@ -210,8 +210,7 @@ static void bl_end_io_read(struct bio *bio, int err) SetPageUptodate(bvec->bv_page); if (err) { - struct nfs_pgio_data *rdata = par->data; - struct nfs_pgio_header *header = rdata->header; + struct nfs_pgio_header *header = par->data; if (!header->pnfs_error) header->pnfs_error = -EIO; @@ -224,43 +223,44 @@ static void bl_end_io_read(struct bio *bio, int err) static void bl_read_cleanup(struct work_struct *work) { struct rpc_task *task; - struct nfs_pgio_data *rdata; + struct nfs_pgio_header *hdr; dprintk("%s enter\n", __func__); task = container_of(work, struct rpc_task, u.tk_work); - rdata = container_of(task, struct nfs_pgio_data, task); - pnfs_ld_read_done(rdata); + hdr = container_of(task, struct nfs_pgio_header, task); + pnfs_ld_read_done(hdr); } static void bl_end_par_io_read(void *data, int unused) { - struct nfs_pgio_data *rdata = data; + struct nfs_pgio_header *hdr = data; - rdata->task.tk_status = rdata->header->pnfs_error; - INIT_WORK(&rdata->task.u.tk_work, bl_read_cleanup); - schedule_work(&rdata->task.u.tk_work); + hdr->task.tk_status = hdr->pnfs_error; + INIT_WORK(&hdr->task.u.tk_work, bl_read_cleanup); + schedule_work(&hdr->task.u.tk_work); } static enum pnfs_try_status -bl_read_pagelist(struct nfs_pgio_data *rdata) +bl_read_pagelist(struct nfs_pgio_header *hdr) { - struct nfs_pgio_header *header = rdata->header; + struct nfs_pgio_header *header = hdr; int i, hole; struct bio *bio = NULL; struct pnfs_block_extent *be = NULL, *cow_read = NULL; sector_t isect, extent_length = 0; struct parallel_io *par; - loff_t f_offset = rdata->args.offset; - size_t bytes_left = rdata->args.count; + loff_t f_offset = hdr->args.offset; + size_t bytes_left = hdr->args.count; unsigned int pg_offset, pg_len; - struct page **pages = rdata->args.pages; - int pg_index = rdata->args.pgbase >> PAGE_CACHE_SHIFT; + struct page **pages = hdr->args.pages; + int pg_index = hdr->args.pgbase >> PAGE_CACHE_SHIFT; const bool is_dio = (header->dreq != NULL); dprintk("%s enter nr_pages %u offset %lld count %u\n", __func__, - rdata->pages.npages, f_offset, (unsigned int)rdata->args.count); + hdr->page_array.npages, f_offset, + (unsigned int)hdr->args.count); - par = alloc_parallel(rdata); + par = alloc_parallel(hdr); if (!par) goto use_mds; par->pnfs_callback = bl_end_par_io_read; @@ -268,7 +268,7 @@ bl_read_pagelist(struct nfs_pgio_data *rdata) isect = (sector_t) (f_offset >> SECTOR_SHIFT); /* Code assumes extents are page-aligned */ - for (i = pg_index; i < rdata->pages.npages; i++) { + for (i = pg_index; i < hdr->page_array.npages; i++) { if (!extent_length) { /* We've used up the previous extent */ bl_put_extent(be); @@ -317,7 +317,8 @@ bl_read_pagelist(struct nfs_pgio_data *rdata) struct pnfs_block_extent *be_read; be_read = (hole && cow_read) ? cow_read : be; - bio = do_add_page_to_bio(bio, rdata->pages.npages - i, + bio = do_add_page_to_bio(bio, + hdr->page_array.npages - i, READ, isect, pages[i], be_read, bl_end_io_read, par, @@ -332,10 +333,10 @@ bl_read_pagelist(struct nfs_pgio_data *rdata) extent_length -= PAGE_CACHE_SECTORS; } if ((isect << SECTOR_SHIFT) >= header->inode->i_size) { - rdata->res.eof = 1; - rdata->res.count = header->inode->i_size - rdata->args.offset; + hdr->res.eof = 1; + hdr->res.count = header->inode->i_size - hdr->args.offset; } else { - rdata->res.count = (isect << SECTOR_SHIFT) - rdata->args.offset; + hdr->res.count = (isect << SECTOR_SHIFT) - hdr->args.offset; } out: bl_put_extent(be); @@ -390,8 +391,7 @@ static void bl_end_io_write_zero(struct bio *bio, int err) } if (unlikely(err)) { - struct nfs_pgio_data *data = par->data; - struct nfs_pgio_header *header = data->header; + struct nfs_pgio_header *header = par->data; if (!header->pnfs_error) header->pnfs_error = -EIO; @@ -405,8 +405,7 @@ static void bl_end_io_write(struct bio *bio, int err) { struct parallel_io *par = bio->bi_private; const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); - struct nfs_pgio_data *data = par->data; - struct nfs_pgio_header *header = data->header; + struct nfs_pgio_header *header = par->data; if (!uptodate) { if (!header->pnfs_error) @@ -423,32 +422,32 @@ static void bl_end_io_write(struct bio *bio, int err) static void bl_write_cleanup(struct work_struct *work) { struct rpc_task *task; - struct nfs_pgio_data *wdata; + struct nfs_pgio_header *hdr; dprintk("%s enter\n", __func__); task = container_of(work, struct rpc_task, u.tk_work); - wdata = container_of(task, struct nfs_pgio_data, task); - if (likely(!wdata->header->pnfs_error)) { + hdr = container_of(task, struct nfs_pgio_header, task); + if (likely(!hdr->pnfs_error)) { /* Marks for LAYOUTCOMMIT */ - mark_extents_written(BLK_LSEG2EXT(wdata->header->lseg), - wdata->args.offset, wdata->args.count); + mark_extents_written(BLK_LSEG2EXT(hdr->lseg), + hdr->args.offset, hdr->args.count); } - pnfs_ld_write_done(wdata); + pnfs_ld_write_done(hdr); } /* Called when last of bios associated with a bl_write_pagelist call finishes */ static void bl_end_par_io_write(void *data, int num_se) { - struct nfs_pgio_data *wdata = data; + struct nfs_pgio_header *hdr = data; - if (unlikely(wdata->header->pnfs_error)) { - bl_free_short_extents(&BLK_LSEG2EXT(wdata->header->lseg)->bl_inval, + if (unlikely(hdr->pnfs_error)) { + bl_free_short_extents(&BLK_LSEG2EXT(hdr->lseg)->bl_inval, num_se); } - wdata->task.tk_status = wdata->header->pnfs_error; - wdata->verf.committed = NFS_FILE_SYNC; - INIT_WORK(&wdata->task.u.tk_work, bl_write_cleanup); - schedule_work(&wdata->task.u.tk_work); + hdr->task.tk_status = hdr->pnfs_error; + hdr->verf.committed = NFS_FILE_SYNC; + INIT_WORK(&hdr->task.u.tk_work, bl_write_cleanup); + schedule_work(&hdr->task.u.tk_work); } /* FIXME STUB - mark intersection of layout and page as bad, so is not @@ -673,18 +672,17 @@ check_page: } static enum pnfs_try_status -bl_write_pagelist(struct nfs_pgio_data *wdata, int sync) +bl_write_pagelist(struct nfs_pgio_header *header, int sync) { - struct nfs_pgio_header *header = wdata->header; int i, ret, npg_zero, pg_index, last = 0; struct bio *bio = NULL; struct pnfs_block_extent *be = NULL, *cow_read = NULL; sector_t isect, last_isect = 0, extent_length = 0; struct parallel_io *par = NULL; - loff_t offset = wdata->args.offset; - size_t count = wdata->args.count; + loff_t offset = header->args.offset; + size_t count = header->args.count; unsigned int pg_offset, pg_len, saved_len; - struct page **pages = wdata->args.pages; + struct page **pages = header->args.pages; struct page *page; pgoff_t index; u64 temp; @@ -699,11 +697,11 @@ bl_write_pagelist(struct nfs_pgio_data *wdata, int sync) dprintk("pnfsblock nonblock aligned DIO writes. Resend MDS\n"); goto out_mds; } - /* At this point, wdata->pages is a (sequential) list of nfs_pages. + /* At this point, header->page_aray is a (sequential) list of nfs_pages. * We want to write each, and if there is an error set pnfs_error * to have it redone using nfs. */ - par = alloc_parallel(wdata); + par = alloc_parallel(header); if (!par) goto out_mds; par->pnfs_callback = bl_end_par_io_write; @@ -790,8 +788,8 @@ next_page: bio = bl_submit_bio(WRITE, bio); /* Middle pages */ - pg_index = wdata->args.pgbase >> PAGE_CACHE_SHIFT; - for (i = pg_index; i < wdata->pages.npages; i++) { + pg_index = header->args.pgbase >> PAGE_CACHE_SHIFT; + for (i = pg_index; i < header->page_array.npages; i++) { if (!extent_length) { /* We've used up the previous extent */ bl_put_extent(be); @@ -862,7 +860,8 @@ next_page: } - bio = do_add_page_to_bio(bio, wdata->pages.npages - i, WRITE, + bio = do_add_page_to_bio(bio, header->page_array.npages - i, + WRITE, isect, pages[i], be, bl_end_io_write, par, pg_offset, pg_len); @@ -890,7 +889,7 @@ next_page: } write_done: - wdata->res.count = wdata->args.count; + header->res.count = header->args.count; out: bl_put_extent(be); bl_put_extent(cow_read); @@ -1063,7 +1062,7 @@ nfs4_blk_get_deviceinfo(struct nfs_server *server, const struct nfs_fh *fh, return ERR_PTR(-ENOMEM); } - pages = kzalloc(max_pages * sizeof(struct page *), GFP_NOFS); + pages = kcalloc(max_pages, sizeof(struct page *), GFP_NOFS); if (pages == NULL) { kfree(dev); return ERR_PTR(-ENOMEM); diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index 073b4cf67ed9..54de482143cc 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -428,6 +428,18 @@ check_gss_callback_principal(struct nfs_client *clp, struct svc_rqst *rqstp) if (p == NULL) return 0; + /* + * Did we get the acceptor from userland during the SETCLIENID + * negotiation? + */ + if (clp->cl_acceptor) + return !strcmp(p, clp->cl_acceptor); + + /* + * Otherwise try to verify it using the cl_hostname. Note that this + * doesn't work if a non-canonical hostname was used in the devname. + */ + /* Expect a GSS_C_NT_HOSTBASED_NAME like "nfs@serverhostname" */ if (memcmp(p, "nfs@", 4) != 0) diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 1d09289c8f0e..6a4f3666e273 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -110,8 +110,8 @@ struct nfs_subversion *get_nfs_version(unsigned int version) mutex_unlock(&nfs_version_mutex); } - if (!IS_ERR(nfs)) - try_module_get(nfs->owner); + if (!IS_ERR(nfs) && !try_module_get(nfs->owner)) + return ERR_PTR(-EAGAIN); return nfs; } @@ -158,7 +158,8 @@ struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_init) goto error_0; clp->cl_nfs_mod = cl_init->nfs_mod; - try_module_get(clp->cl_nfs_mod->owner); + if (!try_module_get(clp->cl_nfs_mod->owner)) + goto error_dealloc; clp->rpc_ops = clp->cl_nfs_mod->rpc_ops; @@ -190,6 +191,7 @@ struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_init) error_cleanup: put_nfs_version(clp->cl_nfs_mod); +error_dealloc: kfree(clp); error_0: return ERR_PTR(err); @@ -252,6 +254,7 @@ void nfs_free_client(struct nfs_client *clp) put_net(clp->cl_net); put_nfs_version(clp->cl_nfs_mod); kfree(clp->cl_hostname); + kfree(clp->cl_acceptor); kfree(clp); dprintk("<-- nfs_free_client()\n"); @@ -482,8 +485,13 @@ nfs_get_client(const struct nfs_client_initdata *cl_init, struct nfs_net *nn = net_generic(cl_init->net, nfs_net_id); const struct nfs_rpc_ops *rpc_ops = cl_init->nfs_mod->rpc_ops; + if (cl_init->hostname == NULL) { + WARN_ON(1); + return NULL; + } + dprintk("--> nfs_get_client(%s,v%u)\n", - cl_init->hostname ?: "", rpc_ops->version); + cl_init->hostname, rpc_ops->version); /* see if the client already exists */ do { @@ -510,7 +518,7 @@ nfs_get_client(const struct nfs_client_initdata *cl_init, } while (!IS_ERR(new)); dprintk("<-- nfs_get_client() Failed to find %s (%ld)\n", - cl_init->hostname ?: "", PTR_ERR(new)); + cl_init->hostname, PTR_ERR(new)); return new; } EXPORT_SYMBOL_GPL(nfs_get_client); @@ -1205,7 +1213,7 @@ static const struct file_operations nfs_server_list_fops = { .open = nfs_server_list_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release, + .release = seq_release_net, .owner = THIS_MODULE, }; @@ -1226,7 +1234,7 @@ static const struct file_operations nfs_volume_list_fops = { .open = nfs_volume_list_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release, + .release = seq_release_net, .owner = THIS_MODULE, }; @@ -1236,19 +1244,8 @@ static const struct file_operations nfs_volume_list_fops = { */ static int nfs_server_list_open(struct inode *inode, struct file *file) { - struct seq_file *m; - int ret; - struct pid_namespace *pid_ns = file->f_dentry->d_sb->s_fs_info; - struct net *net = pid_ns->child_reaper->nsproxy->net_ns; - - ret = seq_open(file, &nfs_server_list_ops); - if (ret < 0) - return ret; - - m = file->private_data; - m->private = net; - - return 0; + return seq_open_net(inode, file, &nfs_server_list_ops, + sizeof(struct seq_net_private)); } /* @@ -1256,7 +1253,7 @@ static int nfs_server_list_open(struct inode *inode, struct file *file) */ static void *nfs_server_list_start(struct seq_file *m, loff_t *_pos) { - struct nfs_net *nn = net_generic(m->private, nfs_net_id); + struct nfs_net *nn = net_generic(seq_file_net(m), nfs_net_id); /* lock the list against modification */ spin_lock(&nn->nfs_client_lock); @@ -1268,7 +1265,7 @@ static void *nfs_server_list_start(struct seq_file *m, loff_t *_pos) */ static void *nfs_server_list_next(struct seq_file *p, void *v, loff_t *pos) { - struct nfs_net *nn = net_generic(p->private, nfs_net_id); + struct nfs_net *nn = net_generic(seq_file_net(p), nfs_net_id); return seq_list_next(v, &nn->nfs_client_list, pos); } @@ -1278,7 +1275,7 @@ static void *nfs_server_list_next(struct seq_file *p, void *v, loff_t *pos) */ static void nfs_server_list_stop(struct seq_file *p, void *v) { - struct nfs_net *nn = net_generic(p->private, nfs_net_id); + struct nfs_net *nn = net_generic(seq_file_net(p), nfs_net_id); spin_unlock(&nn->nfs_client_lock); } @@ -1289,7 +1286,7 @@ static void nfs_server_list_stop(struct seq_file *p, void *v) static int nfs_server_list_show(struct seq_file *m, void *v) { struct nfs_client *clp; - struct nfs_net *nn = net_generic(m->private, nfs_net_id); + struct nfs_net *nn = net_generic(seq_file_net(m), nfs_net_id); /* display header on line 1 */ if (v == &nn->nfs_client_list) { @@ -1321,19 +1318,8 @@ static int nfs_server_list_show(struct seq_file *m, void *v) */ static int nfs_volume_list_open(struct inode *inode, struct file *file) { - struct seq_file *m; - int ret; - struct pid_namespace *pid_ns = file->f_dentry->d_sb->s_fs_info; - struct net *net = pid_ns->child_reaper->nsproxy->net_ns; - - ret = seq_open(file, &nfs_volume_list_ops); - if (ret < 0) - return ret; - - m = file->private_data; - m->private = net; - - return 0; + return seq_open_net(inode, file, &nfs_server_list_ops, + sizeof(struct seq_net_private)); } /* @@ -1341,7 +1327,7 @@ static int nfs_volume_list_open(struct inode *inode, struct file *file) */ static void *nfs_volume_list_start(struct seq_file *m, loff_t *_pos) { - struct nfs_net *nn = net_generic(m->private, nfs_net_id); + struct nfs_net *nn = net_generic(seq_file_net(m), nfs_net_id); /* lock the list against modification */ spin_lock(&nn->nfs_client_lock); @@ -1353,7 +1339,7 @@ static void *nfs_volume_list_start(struct seq_file *m, loff_t *_pos) */ static void *nfs_volume_list_next(struct seq_file *p, void *v, loff_t *pos) { - struct nfs_net *nn = net_generic(p->private, nfs_net_id); + struct nfs_net *nn = net_generic(seq_file_net(p), nfs_net_id); return seq_list_next(v, &nn->nfs_volume_list, pos); } @@ -1363,7 +1349,7 @@ static void *nfs_volume_list_next(struct seq_file *p, void *v, loff_t *pos) */ static void nfs_volume_list_stop(struct seq_file *p, void *v) { - struct nfs_net *nn = net_generic(p->private, nfs_net_id); + struct nfs_net *nn = net_generic(seq_file_net(p), nfs_net_id); spin_unlock(&nn->nfs_client_lock); } @@ -1376,7 +1362,7 @@ static int nfs_volume_list_show(struct seq_file *m, void *v) struct nfs_server *server; struct nfs_client *clp; char dev[8], fsid[17]; - struct nfs_net *nn = net_generic(m->private, nfs_net_id); + struct nfs_net *nn = net_generic(seq_file_net(m), nfs_net_id); /* display header on line 1 */ if (v == &nn->nfs_volume_list) { @@ -1407,6 +1393,39 @@ static int nfs_volume_list_show(struct seq_file *m, void *v) return 0; } +int nfs_fs_proc_net_init(struct net *net) +{ + struct nfs_net *nn = net_generic(net, nfs_net_id); + struct proc_dir_entry *p; + + nn->proc_nfsfs = proc_net_mkdir(net, "nfsfs", net->proc_net); + if (!nn->proc_nfsfs) + goto error_0; + + /* a file of servers with which we're dealing */ + p = proc_create("servers", S_IFREG|S_IRUGO, + nn->proc_nfsfs, &nfs_server_list_fops); + if (!p) + goto error_1; + + /* a file of volumes that we have mounted */ + p = proc_create("volumes", S_IFREG|S_IRUGO, + nn->proc_nfsfs, &nfs_volume_list_fops); + if (!p) + goto error_1; + return 0; + +error_1: + remove_proc_subtree("nfsfs", net->proc_net); +error_0: + return -ENOMEM; +} + +void nfs_fs_proc_net_exit(struct net *net) +{ + remove_proc_subtree("nfsfs", net->proc_net); +} + /* * initialise the /proc/fs/nfsfs/ directory */ @@ -1419,14 +1438,12 @@ int __init nfs_fs_proc_init(void) goto error_0; /* a file of servers with which we're dealing */ - p = proc_create("servers", S_IFREG|S_IRUGO, - proc_fs_nfs, &nfs_server_list_fops); + p = proc_symlink("servers", proc_fs_nfs, "../../net/nfsfs/servers"); if (!p) goto error_1; /* a file of volumes that we have mounted */ - p = proc_create("volumes", S_IFREG|S_IRUGO, - proc_fs_nfs, &nfs_volume_list_fops); + p = proc_symlink("volumes", proc_fs_nfs, "../../net/nfsfs/volumes"); if (!p) goto error_2; return 0; diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 5d8ccecf5f5c..5853f53db732 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -41,14 +41,8 @@ void nfs_mark_delegation_referenced(struct nfs_delegation *delegation) set_bit(NFS_DELEGATION_REFERENCED, &delegation->flags); } -/** - * nfs_have_delegation - check if inode has a delegation - * @inode: inode to check - * @flags: delegation types to check for - * - * Returns one if inode has the indicated delegation, otherwise zero. - */ -int nfs4_have_delegation(struct inode *inode, fmode_t flags) +static int +nfs4_do_check_delegation(struct inode *inode, fmode_t flags, bool mark) { struct nfs_delegation *delegation; int ret = 0; @@ -58,12 +52,34 @@ int nfs4_have_delegation(struct inode *inode, fmode_t flags) delegation = rcu_dereference(NFS_I(inode)->delegation); if (delegation != NULL && (delegation->type & flags) == flags && !test_bit(NFS_DELEGATION_RETURNING, &delegation->flags)) { - nfs_mark_delegation_referenced(delegation); + if (mark) + nfs_mark_delegation_referenced(delegation); ret = 1; } rcu_read_unlock(); return ret; } +/** + * nfs_have_delegation - check if inode has a delegation, mark it + * NFS_DELEGATION_REFERENCED if there is one. + * @inode: inode to check + * @flags: delegation types to check for + * + * Returns one if inode has the indicated delegation, otherwise zero. + */ +int nfs4_have_delegation(struct inode *inode, fmode_t flags) +{ + return nfs4_do_check_delegation(inode, flags, true); +} + +/* + * nfs4_check_delegation - check if inode has a delegation, do not mark + * NFS_DELEGATION_REFERENCED if it has one. + */ +int nfs4_check_delegation(struct inode *inode, fmode_t flags) +{ + return nfs4_do_check_delegation(inode, flags, false); +} static int nfs_delegation_claim_locks(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid) { diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h index 9a79c7a99d6d..5c1cce39297f 100644 --- a/fs/nfs/delegation.h +++ b/fs/nfs/delegation.h @@ -59,6 +59,7 @@ bool nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode, fmode_ void nfs_mark_delegation_referenced(struct nfs_delegation *delegation); int nfs4_have_delegation(struct inode *inode, fmode_t flags); +int nfs4_check_delegation(struct inode *inode, fmode_t flags); #endif diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 4a3d4ef76127..36d921f0c602 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -988,9 +988,13 @@ EXPORT_SYMBOL_GPL(nfs_force_lookup_revalidate); * A check for whether or not the parent directory has changed. * In the case it has, we assume that the dentries are untrustworthy * and may need to be looked up again. + * If rcu_walk prevents us from performing a full check, return 0. */ -static int nfs_check_verifier(struct inode *dir, struct dentry *dentry) +static int nfs_check_verifier(struct inode *dir, struct dentry *dentry, + int rcu_walk) { + int ret; + if (IS_ROOT(dentry)) return 1; if (NFS_SERVER(dir)->flags & NFS_MOUNT_LOOKUP_CACHE_NONE) @@ -998,7 +1002,11 @@ static int nfs_check_verifier(struct inode *dir, struct dentry *dentry) if (!nfs_verify_change_attribute(dir, dentry->d_time)) return 0; /* Revalidate nfsi->cache_change_attribute before we declare a match */ - if (nfs_revalidate_inode(NFS_SERVER(dir), dir) < 0) + if (rcu_walk) + ret = nfs_revalidate_inode_rcu(NFS_SERVER(dir), dir); + else + ret = nfs_revalidate_inode(NFS_SERVER(dir), dir); + if (ret < 0) return 0; if (!nfs_verify_change_attribute(dir, dentry->d_time)) return 0; @@ -1042,6 +1050,8 @@ int nfs_lookup_verify_inode(struct inode *inode, unsigned int flags) out: return (inode->i_nlink == 0) ? -ENOENT : 0; out_force: + if (flags & LOOKUP_RCU) + return -ECHILD; ret = __nfs_revalidate_inode(server, inode); if (ret != 0) return ret; @@ -1054,6 +1064,9 @@ out_force: * * If parent mtime has changed, we revalidate, else we wait for a * period corresponding to the parent's attribute cache timeout value. + * + * If LOOKUP_RCU prevents us from performing a full check, return 1 + * suggesting a reval is needed. */ static inline int nfs_neg_need_reval(struct inode *dir, struct dentry *dentry, @@ -1064,7 +1077,7 @@ int nfs_neg_need_reval(struct inode *dir, struct dentry *dentry, return 0; if (NFS_SERVER(dir)->flags & NFS_MOUNT_LOOKUP_CACHE_NONEG) return 1; - return !nfs_check_verifier(dir, dentry); + return !nfs_check_verifier(dir, dentry, flags & LOOKUP_RCU); } /* @@ -1088,21 +1101,30 @@ static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags) struct nfs4_label *label = NULL; int error; - if (flags & LOOKUP_RCU) - return -ECHILD; - - parent = dget_parent(dentry); - dir = parent->d_inode; + if (flags & LOOKUP_RCU) { + parent = ACCESS_ONCE(dentry->d_parent); + dir = ACCESS_ONCE(parent->d_inode); + if (!dir) + return -ECHILD; + } else { + parent = dget_parent(dentry); + dir = parent->d_inode; + } nfs_inc_stats(dir, NFSIOS_DENTRYREVALIDATE); inode = dentry->d_inode; if (!inode) { - if (nfs_neg_need_reval(dir, dentry, flags)) + if (nfs_neg_need_reval(dir, dentry, flags)) { + if (flags & LOOKUP_RCU) + return -ECHILD; goto out_bad; + } goto out_valid_noent; } if (is_bad_inode(inode)) { + if (flags & LOOKUP_RCU) + return -ECHILD; dfprintk(LOOKUPCACHE, "%s: %pd2 has dud inode\n", __func__, dentry); goto out_bad; @@ -1112,12 +1134,20 @@ static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags) goto out_set_verifier; /* Force a full look up iff the parent directory has changed */ - if (!nfs_is_exclusive_create(dir, flags) && nfs_check_verifier(dir, dentry)) { - if (nfs_lookup_verify_inode(inode, flags)) + if (!nfs_is_exclusive_create(dir, flags) && + nfs_check_verifier(dir, dentry, flags & LOOKUP_RCU)) { + + if (nfs_lookup_verify_inode(inode, flags)) { + if (flags & LOOKUP_RCU) + return -ECHILD; goto out_zap_parent; + } goto out_valid; } + if (flags & LOOKUP_RCU) + return -ECHILD; + if (NFS_STALE(inode)) goto out_bad; @@ -1153,13 +1183,18 @@ out_set_verifier: /* Success: notify readdir to use READDIRPLUS */ nfs_advise_use_readdirplus(dir); out_valid_noent: - dput(parent); + if (flags & LOOKUP_RCU) { + if (parent != ACCESS_ONCE(dentry->d_parent)) + return -ECHILD; + } else + dput(parent); dfprintk(LOOKUPCACHE, "NFS: %s(%pd2) is valid\n", __func__, dentry); return 1; out_zap_parent: nfs_zap_caches(dir); out_bad: + WARN_ON(flags & LOOKUP_RCU); nfs_free_fattr(fattr); nfs_free_fhandle(fhandle); nfs4_label_free(label); @@ -1185,6 +1220,7 @@ out_zap_parent: __func__, dentry); return 0; out_error: + WARN_ON(flags & LOOKUP_RCU); nfs_free_fattr(fattr); nfs_free_fhandle(fhandle); nfs4_label_free(label); @@ -1529,14 +1565,9 @@ EXPORT_SYMBOL_GPL(nfs_atomic_open); static int nfs4_lookup_revalidate(struct dentry *dentry, unsigned int flags) { - struct dentry *parent = NULL; struct inode *inode; - struct inode *dir; int ret = 0; - if (flags & LOOKUP_RCU) - return -ECHILD; - if (!(flags & LOOKUP_OPEN) || (flags & LOOKUP_DIRECTORY)) goto no_open; if (d_mountpoint(dentry)) @@ -1545,34 +1576,47 @@ static int nfs4_lookup_revalidate(struct dentry *dentry, unsigned int flags) goto no_open; inode = dentry->d_inode; - parent = dget_parent(dentry); - dir = parent->d_inode; /* We can't create new files in nfs_open_revalidate(), so we * optimize away revalidation of negative dentries. */ if (inode == NULL) { + struct dentry *parent; + struct inode *dir; + + if (flags & LOOKUP_RCU) { + parent = ACCESS_ONCE(dentry->d_parent); + dir = ACCESS_ONCE(parent->d_inode); + if (!dir) + return -ECHILD; + } else { + parent = dget_parent(dentry); + dir = parent->d_inode; + } if (!nfs_neg_need_reval(dir, dentry, flags)) ret = 1; + else if (flags & LOOKUP_RCU) + ret = -ECHILD; + if (!(flags & LOOKUP_RCU)) + dput(parent); + else if (parent != ACCESS_ONCE(dentry->d_parent)) + return -ECHILD; goto out; } /* NFS only supports OPEN on regular files */ if (!S_ISREG(inode->i_mode)) - goto no_open_dput; + goto no_open; /* We cannot do exclusive creation on a positive dentry */ if (flags & LOOKUP_EXCL) - goto no_open_dput; + goto no_open; /* Let f_op->open() actually open (and revalidate) the file */ ret = 1; out: - dput(parent); return ret; -no_open_dput: - dput(parent); no_open: return nfs_lookup_revalidate(dentry, flags); } @@ -2028,10 +2072,14 @@ static DEFINE_SPINLOCK(nfs_access_lru_lock); static LIST_HEAD(nfs_access_lru_list); static atomic_long_t nfs_access_nr_entries; +static unsigned long nfs_access_max_cachesize = ULONG_MAX; +module_param(nfs_access_max_cachesize, ulong, 0644); +MODULE_PARM_DESC(nfs_access_max_cachesize, "NFS access maximum total cache length"); + static void nfs_access_free_entry(struct nfs_access_entry *entry) { put_rpccred(entry->cred); - kfree(entry); + kfree_rcu(entry, rcu_head); smp_mb__before_atomic(); atomic_long_dec(&nfs_access_nr_entries); smp_mb__after_atomic(); @@ -2048,19 +2096,14 @@ static void nfs_access_free_list(struct list_head *head) } } -unsigned long -nfs_access_cache_scan(struct shrinker *shrink, struct shrink_control *sc) +static unsigned long +nfs_do_access_cache_scan(unsigned int nr_to_scan) { LIST_HEAD(head); struct nfs_inode *nfsi, *next; struct nfs_access_entry *cache; - int nr_to_scan = sc->nr_to_scan; - gfp_t gfp_mask = sc->gfp_mask; long freed = 0; - if ((gfp_mask & GFP_KERNEL) != GFP_KERNEL) - return SHRINK_STOP; - spin_lock(&nfs_access_lru_lock); list_for_each_entry_safe(nfsi, next, &nfs_access_lru_list, access_cache_inode_lru) { struct inode *inode; @@ -2094,11 +2137,39 @@ remove_lru_entry: } unsigned long +nfs_access_cache_scan(struct shrinker *shrink, struct shrink_control *sc) +{ + int nr_to_scan = sc->nr_to_scan; + gfp_t gfp_mask = sc->gfp_mask; + + if ((gfp_mask & GFP_KERNEL) != GFP_KERNEL) + return SHRINK_STOP; + return nfs_do_access_cache_scan(nr_to_scan); +} + + +unsigned long nfs_access_cache_count(struct shrinker *shrink, struct shrink_control *sc) { return vfs_pressure_ratio(atomic_long_read(&nfs_access_nr_entries)); } +static void +nfs_access_cache_enforce_limit(void) +{ + long nr_entries = atomic_long_read(&nfs_access_nr_entries); + unsigned long diff; + unsigned int nr_to_scan; + + if (nr_entries < 0 || nr_entries <= nfs_access_max_cachesize) + return; + nr_to_scan = 100; + diff = nr_entries - nfs_access_max_cachesize; + if (diff < nr_to_scan) + nr_to_scan = diff; + nfs_do_access_cache_scan(nr_to_scan); +} + static void __nfs_access_zap_cache(struct nfs_inode *nfsi, struct list_head *head) { struct rb_root *root_node = &nfsi->access_cache; @@ -2186,6 +2257,38 @@ out_zap: return -ENOENT; } +static int nfs_access_get_cached_rcu(struct inode *inode, struct rpc_cred *cred, struct nfs_access_entry *res) +{ + /* Only check the most recently returned cache entry, + * but do it without locking. + */ + struct nfs_inode *nfsi = NFS_I(inode); + struct nfs_access_entry *cache; + int err = -ECHILD; + struct list_head *lh; + + rcu_read_lock(); + if (nfsi->cache_validity & NFS_INO_INVALID_ACCESS) + goto out; + lh = rcu_dereference(nfsi->access_cache_entry_lru.prev); + cache = list_entry(lh, struct nfs_access_entry, lru); + if (lh == &nfsi->access_cache_entry_lru || + cred != cache->cred) + cache = NULL; + if (cache == NULL) + goto out; + if (!nfs_have_delegated_attributes(inode) && + !time_in_range_open(jiffies, cache->jiffies, cache->jiffies + nfsi->attrtimeo)) + goto out; + res->jiffies = cache->jiffies; + res->cred = cache->cred; + res->mask = cache->mask; + err = 0; +out: + rcu_read_unlock(); + return err; +} + static void nfs_access_add_rbtree(struct inode *inode, struct nfs_access_entry *set) { struct nfs_inode *nfsi = NFS_I(inode); @@ -2229,6 +2332,11 @@ void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set) cache->cred = get_rpccred(set->cred); cache->mask = set->mask; + /* The above field assignments must be visible + * before this item appears on the lru. We cannot easily + * use rcu_assign_pointer, so just force the memory barrier. + */ + smp_wmb(); nfs_access_add_rbtree(inode, cache); /* Update accounting */ @@ -2244,6 +2352,7 @@ void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set) &nfs_access_lru_list); spin_unlock(&nfs_access_lru_lock); } + nfs_access_cache_enforce_limit(); } EXPORT_SYMBOL_GPL(nfs_access_add_cache); @@ -2267,10 +2376,16 @@ static int nfs_do_access(struct inode *inode, struct rpc_cred *cred, int mask) trace_nfs_access_enter(inode); - status = nfs_access_get_cached(inode, cred, &cache); + status = nfs_access_get_cached_rcu(inode, cred, &cache); + if (status != 0) + status = nfs_access_get_cached(inode, cred, &cache); if (status == 0) goto out_cached; + status = -ECHILD; + if (mask & MAY_NOT_BLOCK) + goto out; + /* Be clever: ask server to check for all possible rights */ cache.mask = MAY_EXEC | MAY_WRITE | MAY_READ; cache.cred = cred; @@ -2321,9 +2436,6 @@ int nfs_permission(struct inode *inode, int mask) struct rpc_cred *cred; int res = 0; - if (mask & MAY_NOT_BLOCK) - return -ECHILD; - nfs_inc_stats(inode, NFSIOS_VFSACCESS); if ((mask & (MAY_READ | MAY_WRITE | MAY_EXEC)) == 0) @@ -2350,12 +2462,23 @@ force_lookup: if (!NFS_PROTO(inode)->access) goto out_notsup; - cred = rpc_lookup_cred(); - if (!IS_ERR(cred)) { - res = nfs_do_access(inode, cred, mask); - put_rpccred(cred); - } else + /* Always try fast lookups first */ + rcu_read_lock(); + cred = rpc_lookup_cred_nonblock(); + if (!IS_ERR(cred)) + res = nfs_do_access(inode, cred, mask|MAY_NOT_BLOCK); + else res = PTR_ERR(cred); + rcu_read_unlock(); + if (res == -ECHILD && !(mask & MAY_NOT_BLOCK)) { + /* Fast lookup failed, try the slow way */ + cred = rpc_lookup_cred(); + if (!IS_ERR(cred)) { + res = nfs_do_access(inode, cred, mask); + put_rpccred(cred); + } else + res = PTR_ERR(cred); + } out: if (!res && (mask & MAY_EXEC) && !execute_ok(inode)) res = -EACCES; @@ -2364,6 +2487,9 @@ out: inode->i_sb->s_id, inode->i_ino, mask, res); return res; out_notsup: + if (mask & MAY_NOT_BLOCK) + return -ECHILD; + res = nfs_revalidate_inode(NFS_SERVER(inode), inode); if (res == 0) res = generic_permission(inode, mask); diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index f11b9eed0de1..65ef6e00deee 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -148,8 +148,8 @@ static void nfs_direct_set_hdr_verf(struct nfs_direct_req *dreq, { struct nfs_writeverf *verfp; - verfp = nfs_direct_select_verf(dreq, hdr->data->ds_clp, - hdr->data->ds_idx); + verfp = nfs_direct_select_verf(dreq, hdr->ds_clp, + hdr->ds_idx); WARN_ON_ONCE(verfp->committed >= 0); memcpy(verfp, &hdr->verf, sizeof(struct nfs_writeverf)); WARN_ON_ONCE(verfp->committed < 0); @@ -169,8 +169,8 @@ static int nfs_direct_set_or_cmp_hdr_verf(struct nfs_direct_req *dreq, { struct nfs_writeverf *verfp; - verfp = nfs_direct_select_verf(dreq, hdr->data->ds_clp, - hdr->data->ds_idx); + verfp = nfs_direct_select_verf(dreq, hdr->ds_clp, + hdr->ds_idx); if (verfp->committed < 0) { nfs_direct_set_hdr_verf(dreq, hdr); return 0; @@ -715,7 +715,7 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr) { struct nfs_direct_req *dreq = hdr->dreq; struct nfs_commit_info cinfo; - int bit = -1; + bool request_commit = false; struct nfs_page *req = nfs_list_entry(hdr->pages.next); if (test_bit(NFS_IOHDR_REDO, &hdr->flags)) @@ -729,27 +729,20 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr) dreq->flags = 0; dreq->error = hdr->error; } - if (dreq->error != 0) - bit = NFS_IOHDR_ERROR; - else { + if (dreq->error == 0) { dreq->count += hdr->good_bytes; - if (test_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags)) { - dreq->flags = NFS_ODIRECT_RESCHED_WRITES; - bit = NFS_IOHDR_NEED_RESCHED; - } else if (test_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags)) { + if (nfs_write_need_commit(hdr)) { if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES) - bit = NFS_IOHDR_NEED_RESCHED; + request_commit = true; else if (dreq->flags == 0) { nfs_direct_set_hdr_verf(dreq, hdr); - bit = NFS_IOHDR_NEED_COMMIT; + request_commit = true; dreq->flags = NFS_ODIRECT_DO_COMMIT; } else if (dreq->flags == NFS_ODIRECT_DO_COMMIT) { - if (nfs_direct_set_or_cmp_hdr_verf(dreq, hdr)) { + request_commit = true; + if (nfs_direct_set_or_cmp_hdr_verf(dreq, hdr)) dreq->flags = NFS_ODIRECT_RESCHED_WRITES; - bit = NFS_IOHDR_NEED_RESCHED; - } else - bit = NFS_IOHDR_NEED_COMMIT; } } } @@ -759,9 +752,7 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr) req = nfs_list_entry(hdr->pages.next); nfs_list_remove_request(req); - switch (bit) { - case NFS_IOHDR_NEED_RESCHED: - case NFS_IOHDR_NEED_COMMIT: + if (request_commit) { kref_get(&req->wb_kref); nfs_mark_request_commit(req, hdr->lseg, &cinfo); } diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 4042ff58fe3f..524dd80d1898 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -361,8 +361,8 @@ start: * Prevent starvation issues if someone is doing a consistency * sync-to-disk */ - ret = wait_on_bit(&NFS_I(mapping->host)->flags, NFS_INO_FLUSHING, - nfs_wait_bit_killable, TASK_KILLABLE); + ret = wait_on_bit_action(&NFS_I(mapping->host)->flags, NFS_INO_FLUSHING, + nfs_wait_bit_killable, TASK_KILLABLE); if (ret) return ret; diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c index d2eba1c13b7e..90978075f730 100644 --- a/fs/nfs/filelayout/filelayout.c +++ b/fs/nfs/filelayout/filelayout.c @@ -84,45 +84,37 @@ filelayout_get_dserver_offset(struct pnfs_layout_segment *lseg, loff_t offset) BUG(); } -static void filelayout_reset_write(struct nfs_pgio_data *data) +static void filelayout_reset_write(struct nfs_pgio_header *hdr) { - struct nfs_pgio_header *hdr = data->header; - struct rpc_task *task = &data->task; + struct rpc_task *task = &hdr->task; if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { dprintk("%s Reset task %5u for i/o through MDS " "(req %s/%llu, %u bytes @ offset %llu)\n", __func__, - data->task.tk_pid, + hdr->task.tk_pid, hdr->inode->i_sb->s_id, (unsigned long long)NFS_FILEID(hdr->inode), - data->args.count, - (unsigned long long)data->args.offset); + hdr->args.count, + (unsigned long long)hdr->args.offset); - task->tk_status = pnfs_write_done_resend_to_mds(hdr->inode, - &hdr->pages, - hdr->completion_ops, - hdr->dreq); + task->tk_status = pnfs_write_done_resend_to_mds(hdr); } } -static void filelayout_reset_read(struct nfs_pgio_data *data) +static void filelayout_reset_read(struct nfs_pgio_header *hdr) { - struct nfs_pgio_header *hdr = data->header; - struct rpc_task *task = &data->task; + struct rpc_task *task = &hdr->task; if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { dprintk("%s Reset task %5u for i/o through MDS " "(req %s/%llu, %u bytes @ offset %llu)\n", __func__, - data->task.tk_pid, + hdr->task.tk_pid, hdr->inode->i_sb->s_id, (unsigned long long)NFS_FILEID(hdr->inode), - data->args.count, - (unsigned long long)data->args.offset); + hdr->args.count, + (unsigned long long)hdr->args.offset); - task->tk_status = pnfs_read_done_resend_to_mds(hdr->inode, - &hdr->pages, - hdr->completion_ops, - hdr->dreq); + task->tk_status = pnfs_read_done_resend_to_mds(hdr); } } @@ -243,18 +235,17 @@ wait_on_recovery: /* NFS_PROTO call done callback routines */ static int filelayout_read_done_cb(struct rpc_task *task, - struct nfs_pgio_data *data) + struct nfs_pgio_header *hdr) { - struct nfs_pgio_header *hdr = data->header; int err; - trace_nfs4_pnfs_read(data, task->tk_status); - err = filelayout_async_handle_error(task, data->args.context->state, - data->ds_clp, hdr->lseg); + trace_nfs4_pnfs_read(hdr, task->tk_status); + err = filelayout_async_handle_error(task, hdr->args.context->state, + hdr->ds_clp, hdr->lseg); switch (err) { case -NFS4ERR_RESET_TO_MDS: - filelayout_reset_read(data); + filelayout_reset_read(hdr); return task->tk_status; case -EAGAIN: rpc_restart_call_prepare(task); @@ -270,15 +261,14 @@ static int filelayout_read_done_cb(struct rpc_task *task, * rfc5661 is not clear about which credential should be used. */ static void -filelayout_set_layoutcommit(struct nfs_pgio_data *wdata) +filelayout_set_layoutcommit(struct nfs_pgio_header *hdr) { - struct nfs_pgio_header *hdr = wdata->header; if (FILELAYOUT_LSEG(hdr->lseg)->commit_through_mds || - wdata->res.verf->committed == NFS_FILE_SYNC) + hdr->res.verf->committed == NFS_FILE_SYNC) return; - pnfs_set_layoutcommit(wdata); + pnfs_set_layoutcommit(hdr); dprintk("%s inode %lu pls_end_pos %lu\n", __func__, hdr->inode->i_ino, (unsigned long) NFS_I(hdr->inode)->layout->plh_lwb); } @@ -305,83 +295,82 @@ filelayout_reset_to_mds(struct pnfs_layout_segment *lseg) */ static void filelayout_read_prepare(struct rpc_task *task, void *data) { - struct nfs_pgio_data *rdata = data; + struct nfs_pgio_header *hdr = data; - if (unlikely(test_bit(NFS_CONTEXT_BAD, &rdata->args.context->flags))) { + if (unlikely(test_bit(NFS_CONTEXT_BAD, &hdr->args.context->flags))) { rpc_exit(task, -EIO); return; } - if (filelayout_reset_to_mds(rdata->header->lseg)) { + if (filelayout_reset_to_mds(hdr->lseg)) { dprintk("%s task %u reset io to MDS\n", __func__, task->tk_pid); - filelayout_reset_read(rdata); + filelayout_reset_read(hdr); rpc_exit(task, 0); return; } - rdata->pgio_done_cb = filelayout_read_done_cb; + hdr->pgio_done_cb = filelayout_read_done_cb; - if (nfs41_setup_sequence(rdata->ds_clp->cl_session, - &rdata->args.seq_args, - &rdata->res.seq_res, + if (nfs41_setup_sequence(hdr->ds_clp->cl_session, + &hdr->args.seq_args, + &hdr->res.seq_res, task)) return; - if (nfs4_set_rw_stateid(&rdata->args.stateid, rdata->args.context, - rdata->args.lock_context, FMODE_READ) == -EIO) + if (nfs4_set_rw_stateid(&hdr->args.stateid, hdr->args.context, + hdr->args.lock_context, FMODE_READ) == -EIO) rpc_exit(task, -EIO); /* lost lock, terminate I/O */ } static void filelayout_read_call_done(struct rpc_task *task, void *data) { - struct nfs_pgio_data *rdata = data; + struct nfs_pgio_header *hdr = data; dprintk("--> %s task->tk_status %d\n", __func__, task->tk_status); - if (test_bit(NFS_IOHDR_REDO, &rdata->header->flags) && + if (test_bit(NFS_IOHDR_REDO, &hdr->flags) && task->tk_status == 0) { - nfs41_sequence_done(task, &rdata->res.seq_res); + nfs41_sequence_done(task, &hdr->res.seq_res); return; } /* Note this may cause RPC to be resent */ - rdata->header->mds_ops->rpc_call_done(task, data); + hdr->mds_ops->rpc_call_done(task, data); } static void filelayout_read_count_stats(struct rpc_task *task, void *data) { - struct nfs_pgio_data *rdata = data; + struct nfs_pgio_header *hdr = data; - rpc_count_iostats(task, NFS_SERVER(rdata->header->inode)->client->cl_metrics); + rpc_count_iostats(task, NFS_SERVER(hdr->inode)->client->cl_metrics); } static void filelayout_read_release(void *data) { - struct nfs_pgio_data *rdata = data; - struct pnfs_layout_hdr *lo = rdata->header->lseg->pls_layout; + struct nfs_pgio_header *hdr = data; + struct pnfs_layout_hdr *lo = hdr->lseg->pls_layout; filelayout_fenceme(lo->plh_inode, lo); - nfs_put_client(rdata->ds_clp); - rdata->header->mds_ops->rpc_release(data); + nfs_put_client(hdr->ds_clp); + hdr->mds_ops->rpc_release(data); } static int filelayout_write_done_cb(struct rpc_task *task, - struct nfs_pgio_data *data) + struct nfs_pgio_header *hdr) { - struct nfs_pgio_header *hdr = data->header; int err; - trace_nfs4_pnfs_write(data, task->tk_status); - err = filelayout_async_handle_error(task, data->args.context->state, - data->ds_clp, hdr->lseg); + trace_nfs4_pnfs_write(hdr, task->tk_status); + err = filelayout_async_handle_error(task, hdr->args.context->state, + hdr->ds_clp, hdr->lseg); switch (err) { case -NFS4ERR_RESET_TO_MDS: - filelayout_reset_write(data); + filelayout_reset_write(hdr); return task->tk_status; case -EAGAIN: rpc_restart_call_prepare(task); return -EAGAIN; } - filelayout_set_layoutcommit(data); + filelayout_set_layoutcommit(hdr); return 0; } @@ -419,57 +408,57 @@ static int filelayout_commit_done_cb(struct rpc_task *task, static void filelayout_write_prepare(struct rpc_task *task, void *data) { - struct nfs_pgio_data *wdata = data; + struct nfs_pgio_header *hdr = data; - if (unlikely(test_bit(NFS_CONTEXT_BAD, &wdata->args.context->flags))) { + if (unlikely(test_bit(NFS_CONTEXT_BAD, &hdr->args.context->flags))) { rpc_exit(task, -EIO); return; } - if (filelayout_reset_to_mds(wdata->header->lseg)) { + if (filelayout_reset_to_mds(hdr->lseg)) { dprintk("%s task %u reset io to MDS\n", __func__, task->tk_pid); - filelayout_reset_write(wdata); + filelayout_reset_write(hdr); rpc_exit(task, 0); return; } - if (nfs41_setup_sequence(wdata->ds_clp->cl_session, - &wdata->args.seq_args, - &wdata->res.seq_res, + if (nfs41_setup_sequence(hdr->ds_clp->cl_session, + &hdr->args.seq_args, + &hdr->res.seq_res, task)) return; - if (nfs4_set_rw_stateid(&wdata->args.stateid, wdata->args.context, - wdata->args.lock_context, FMODE_WRITE) == -EIO) + if (nfs4_set_rw_stateid(&hdr->args.stateid, hdr->args.context, + hdr->args.lock_context, FMODE_WRITE) == -EIO) rpc_exit(task, -EIO); /* lost lock, terminate I/O */ } static void filelayout_write_call_done(struct rpc_task *task, void *data) { - struct nfs_pgio_data *wdata = data; + struct nfs_pgio_header *hdr = data; - if (test_bit(NFS_IOHDR_REDO, &wdata->header->flags) && + if (test_bit(NFS_IOHDR_REDO, &hdr->flags) && task->tk_status == 0) { - nfs41_sequence_done(task, &wdata->res.seq_res); + nfs41_sequence_done(task, &hdr->res.seq_res); return; } /* Note this may cause RPC to be resent */ - wdata->header->mds_ops->rpc_call_done(task, data); + hdr->mds_ops->rpc_call_done(task, data); } static void filelayout_write_count_stats(struct rpc_task *task, void *data) { - struct nfs_pgio_data *wdata = data; + struct nfs_pgio_header *hdr = data; - rpc_count_iostats(task, NFS_SERVER(wdata->header->inode)->client->cl_metrics); + rpc_count_iostats(task, NFS_SERVER(hdr->inode)->client->cl_metrics); } static void filelayout_write_release(void *data) { - struct nfs_pgio_data *wdata = data; - struct pnfs_layout_hdr *lo = wdata->header->lseg->pls_layout; + struct nfs_pgio_header *hdr = data; + struct pnfs_layout_hdr *lo = hdr->lseg->pls_layout; filelayout_fenceme(lo->plh_inode, lo); - nfs_put_client(wdata->ds_clp); - wdata->header->mds_ops->rpc_release(data); + nfs_put_client(hdr->ds_clp); + hdr->mds_ops->rpc_release(data); } static void filelayout_commit_prepare(struct rpc_task *task, void *data) @@ -529,19 +518,18 @@ static const struct rpc_call_ops filelayout_commit_call_ops = { }; static enum pnfs_try_status -filelayout_read_pagelist(struct nfs_pgio_data *data) +filelayout_read_pagelist(struct nfs_pgio_header *hdr) { - struct nfs_pgio_header *hdr = data->header; struct pnfs_layout_segment *lseg = hdr->lseg; struct nfs4_pnfs_ds *ds; struct rpc_clnt *ds_clnt; - loff_t offset = data->args.offset; + loff_t offset = hdr->args.offset; u32 j, idx; struct nfs_fh *fh; dprintk("--> %s ino %lu pgbase %u req %Zu@%llu\n", __func__, hdr->inode->i_ino, - data->args.pgbase, (size_t)data->args.count, offset); + hdr->args.pgbase, (size_t)hdr->args.count, offset); /* Retrieve the correct rpc_client for the byte range */ j = nfs4_fl_calc_j_index(lseg, offset); @@ -559,30 +547,29 @@ filelayout_read_pagelist(struct nfs_pgio_data *data) /* No multipath support. Use first DS */ atomic_inc(&ds->ds_clp->cl_count); - data->ds_clp = ds->ds_clp; - data->ds_idx = idx; + hdr->ds_clp = ds->ds_clp; + hdr->ds_idx = idx; fh = nfs4_fl_select_ds_fh(lseg, j); if (fh) - data->args.fh = fh; + hdr->args.fh = fh; - data->args.offset = filelayout_get_dserver_offset(lseg, offset); - data->mds_offset = offset; + hdr->args.offset = filelayout_get_dserver_offset(lseg, offset); + hdr->mds_offset = offset; /* Perform an asynchronous read to ds */ - nfs_initiate_pgio(ds_clnt, data, + nfs_initiate_pgio(ds_clnt, hdr, &filelayout_read_call_ops, 0, RPC_TASK_SOFTCONN); return PNFS_ATTEMPTED; } /* Perform async writes. */ static enum pnfs_try_status -filelayout_write_pagelist(struct nfs_pgio_data *data, int sync) +filelayout_write_pagelist(struct nfs_pgio_header *hdr, int sync) { - struct nfs_pgio_header *hdr = data->header; struct pnfs_layout_segment *lseg = hdr->lseg; struct nfs4_pnfs_ds *ds; struct rpc_clnt *ds_clnt; - loff_t offset = data->args.offset; + loff_t offset = hdr->args.offset; u32 j, idx; struct nfs_fh *fh; @@ -598,21 +585,20 @@ filelayout_write_pagelist(struct nfs_pgio_data *data, int sync) return PNFS_NOT_ATTEMPTED; dprintk("%s ino %lu sync %d req %Zu@%llu DS: %s cl_count %d\n", - __func__, hdr->inode->i_ino, sync, (size_t) data->args.count, + __func__, hdr->inode->i_ino, sync, (size_t) hdr->args.count, offset, ds->ds_remotestr, atomic_read(&ds->ds_clp->cl_count)); - data->pgio_done_cb = filelayout_write_done_cb; + hdr->pgio_done_cb = filelayout_write_done_cb; atomic_inc(&ds->ds_clp->cl_count); - data->ds_clp = ds->ds_clp; - data->ds_idx = idx; + hdr->ds_clp = ds->ds_clp; + hdr->ds_idx = idx; fh = nfs4_fl_select_ds_fh(lseg, j); if (fh) - data->args.fh = fh; - - data->args.offset = filelayout_get_dserver_offset(lseg, offset); + hdr->args.fh = fh; + hdr->args.offset = filelayout_get_dserver_offset(lseg, offset); /* Perform an asynchronous write */ - nfs_initiate_pgio(ds_clnt, data, + nfs_initiate_pgio(ds_clnt, hdr, &filelayout_write_call_ops, sync, RPC_TASK_SOFTCONN); return PNFS_ATTEMPTED; @@ -1023,6 +1009,7 @@ static u32 select_bucket_index(struct nfs4_filelayout_segment *fl, u32 j) /* The generic layer is about to remove the req from the commit list. * If this will make the bucket empty, it will need to put the lseg reference. + * Note this is must be called holding the inode (/cinfo) lock */ static void filelayout_clear_request_commit(struct nfs_page *req, @@ -1030,7 +1017,6 @@ filelayout_clear_request_commit(struct nfs_page *req, { struct pnfs_layout_segment *freeme = NULL; - spin_lock(cinfo->lock); if (!test_and_clear_bit(PG_COMMIT_TO_DS, &req->wb_flags)) goto out; cinfo->ds->nwritten--; @@ -1045,22 +1031,25 @@ filelayout_clear_request_commit(struct nfs_page *req, } out: nfs_request_remove_commit_list(req, cinfo); - spin_unlock(cinfo->lock); - pnfs_put_lseg(freeme); + pnfs_put_lseg_async(freeme); } -static struct list_head * -filelayout_choose_commit_list(struct nfs_page *req, - struct pnfs_layout_segment *lseg, - struct nfs_commit_info *cinfo) +static void +filelayout_mark_request_commit(struct nfs_page *req, + struct pnfs_layout_segment *lseg, + struct nfs_commit_info *cinfo) + { struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg); u32 i, j; struct list_head *list; struct pnfs_commit_bucket *buckets; - if (fl->commit_through_mds) - return &cinfo->mds->list; + if (fl->commit_through_mds) { + list = &cinfo->mds->list; + spin_lock(cinfo->lock); + goto mds_commit; + } /* Note that we are calling nfs4_fl_calc_j_index on each page * that ends up being committed to a data server. An attractive @@ -1084,19 +1073,22 @@ filelayout_choose_commit_list(struct nfs_page *req, } set_bit(PG_COMMIT_TO_DS, &req->wb_flags); cinfo->ds->nwritten++; - spin_unlock(cinfo->lock); - return list; -} -static void -filelayout_mark_request_commit(struct nfs_page *req, - struct pnfs_layout_segment *lseg, - struct nfs_commit_info *cinfo) -{ - struct list_head *list; - - list = filelayout_choose_commit_list(req, lseg, cinfo); - nfs_request_add_commit_list(req, list, cinfo); +mds_commit: + /* nfs_request_add_commit_list(). We need to add req to list without + * dropping cinfo lock. + */ + set_bit(PG_CLEAN, &(req)->wb_flags); + nfs_list_add_request(req, list); + cinfo->mds->ncommit++; + spin_unlock(cinfo->lock); + if (!cinfo->dreq) { + inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); + inc_bdi_stat(page_file_mapping(req->wb_page)->backing_dev_info, + BDI_RECLAIMABLE); + __mark_inode_dirty(req->wb_context->dentry->d_inode, + I_DIRTY_DATASYNC); + } } static u32 calc_ds_index_from_commit(struct pnfs_layout_segment *lseg, u32 i) @@ -1244,15 +1236,64 @@ restart: spin_unlock(cinfo->lock); } +/* filelayout_search_commit_reqs - Search lists in @cinfo for the head reqest + * for @page + * @cinfo - commit info for current inode + * @page - page to search for matching head request + * + * Returns a the head request if one is found, otherwise returns NULL. + */ +static struct nfs_page * +filelayout_search_commit_reqs(struct nfs_commit_info *cinfo, struct page *page) +{ + struct nfs_page *freq, *t; + struct pnfs_commit_bucket *b; + int i; + + /* Linearly search the commit lists for each bucket until a matching + * request is found */ + for (i = 0, b = cinfo->ds->buckets; i < cinfo->ds->nbuckets; i++, b++) { + list_for_each_entry_safe(freq, t, &b->written, wb_list) { + if (freq->wb_page == page) + return freq->wb_head; + } + list_for_each_entry_safe(freq, t, &b->committing, wb_list) { + if (freq->wb_page == page) + return freq->wb_head; + } + } + + return NULL; +} + +static void filelayout_retry_commit(struct nfs_commit_info *cinfo, int idx) +{ + struct pnfs_ds_commit_info *fl_cinfo = cinfo->ds; + struct pnfs_commit_bucket *bucket; + struct pnfs_layout_segment *freeme; + int i; + + for (i = idx; i < fl_cinfo->nbuckets; i++) { + bucket = &fl_cinfo->buckets[i]; + if (list_empty(&bucket->committing)) + continue; + nfs_retry_commit(&bucket->committing, bucket->clseg, cinfo); + spin_lock(cinfo->lock); + freeme = bucket->clseg; + bucket->clseg = NULL; + spin_unlock(cinfo->lock); + pnfs_put_lseg(freeme); + } +} + static unsigned int alloc_ds_commits(struct nfs_commit_info *cinfo, struct list_head *list) { struct pnfs_ds_commit_info *fl_cinfo; struct pnfs_commit_bucket *bucket; struct nfs_commit_data *data; - int i, j; + int i; unsigned int nreq = 0; - struct pnfs_layout_segment *freeme; fl_cinfo = cinfo->ds; bucket = fl_cinfo->buckets; @@ -1272,16 +1313,7 @@ alloc_ds_commits(struct nfs_commit_info *cinfo, struct list_head *list) } /* Clean up on error */ - for (j = i; j < fl_cinfo->nbuckets; j++, bucket++) { - if (list_empty(&bucket->committing)) - continue; - nfs_retry_commit(&bucket->committing, bucket->clseg, cinfo); - spin_lock(cinfo->lock); - freeme = bucket->clseg; - bucket->clseg = NULL; - spin_unlock(cinfo->lock); - pnfs_put_lseg(freeme); - } + filelayout_retry_commit(cinfo, i); /* Caller will clean up entries put on list */ return nreq; } @@ -1301,8 +1333,12 @@ filelayout_commit_pagelist(struct inode *inode, struct list_head *mds_pages, data->lseg = NULL; list_add(&data->pages, &list); nreq++; - } else + } else { nfs_retry_commit(mds_pages, NULL, cinfo); + filelayout_retry_commit(cinfo, 0); + cinfo->completion_ops->error_cleanup(NFS_I(inode)); + return -ENOMEM; + } } nreq += alloc_ds_commits(cinfo, &list); @@ -1380,6 +1416,7 @@ static struct pnfs_layoutdriver_type filelayout_type = { .clear_request_commit = filelayout_clear_request_commit, .scan_commit_lists = filelayout_scan_commit_lists, .recover_commit_reqs = filelayout_recover_commit_reqs, + .search_commit_reqs = filelayout_search_commit_reqs, .commit_pagelist = filelayout_commit_pagelist, .read_pagelist = filelayout_read_pagelist, .write_pagelist = filelayout_write_pagelist, diff --git a/fs/nfs/filelayout/filelayoutdev.c b/fs/nfs/filelayout/filelayoutdev.c index 44bf0140a4c7..8540516f4d71 100644 --- a/fs/nfs/filelayout/filelayoutdev.c +++ b/fs/nfs/filelayout/filelayoutdev.c @@ -695,7 +695,7 @@ filelayout_get_device_info(struct inode *inode, if (pdev == NULL) return NULL; - pages = kzalloc(max_pages * sizeof(struct page *), gfp_flags); + pages = kcalloc(max_pages, sizeof(struct page *), gfp_flags); if (pages == NULL) { kfree(pdev); return NULL; @@ -783,8 +783,8 @@ nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j) static void nfs4_wait_ds_connect(struct nfs4_pnfs_ds *ds) { might_sleep(); - wait_on_bit(&ds->ds_state, NFS4DS_CONNECTING, - nfs_wait_bit_killable, TASK_KILLABLE); + wait_on_bit_action(&ds->ds_state, NFS4DS_CONNECTING, + nfs_wait_bit_killable, TASK_KILLABLE); } static void nfs4_clear_ds_conn_bit(struct nfs4_pnfs_ds *ds) diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c index b94f80420a58..880618a8b048 100644 --- a/fs/nfs/getroot.c +++ b/fs/nfs/getroot.c @@ -112,7 +112,7 @@ struct dentry *nfs_get_root(struct super_block *sb, struct nfs_fh *mntfh, * if the dentry tree reaches them; however if the dentry already * exists, we'll pick it up at this point and use it as the root */ - ret = d_obtain_alias(inode); + ret = d_obtain_root(inode); if (IS_ERR(ret)) { dprintk("nfs_get_root: get root dentry failed\n"); goto out; diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 9927913c97c2..577a36f0a510 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -75,7 +75,7 @@ nfs_fattr_to_ino_t(struct nfs_fattr *fattr) * nfs_wait_bit_killable - helper for functions that are sleeping on bit locks * @word: long word containing the bit lock */ -int nfs_wait_bit_killable(void *word) +int nfs_wait_bit_killable(struct wait_bit_key *key) { if (fatal_signal_pending(current)) return -ERESTARTSYS; @@ -1002,6 +1002,15 @@ int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) } EXPORT_SYMBOL_GPL(nfs_revalidate_inode); +int nfs_revalidate_inode_rcu(struct nfs_server *server, struct inode *inode) +{ + if (!(NFS_I(inode)->cache_validity & + (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_LABEL)) + && !nfs_attribute_cache_expired(inode)) + return NFS_STALE(inode) ? -ESTALE : 0; + return -ECHILD; +} + static int nfs_invalidate_mapping(struct inode *inode, struct address_space *mapping) { struct nfs_inode *nfsi = NFS_I(inode); @@ -1074,8 +1083,8 @@ int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping) * the bit lock here if it looks like we're going to be doing that. */ for (;;) { - ret = wait_on_bit(bitlock, NFS_INO_INVALIDATING, - nfs_wait_bit_killable, TASK_KILLABLE); + ret = wait_on_bit_action(bitlock, NFS_INO_INVALIDATING, + nfs_wait_bit_killable, TASK_KILLABLE); if (ret) goto out; spin_lock(&inode->i_lock); @@ -1840,11 +1849,12 @@ EXPORT_SYMBOL_GPL(nfs_net_id); static int nfs_net_init(struct net *net) { nfs_clients_init(net); - return 0; + return nfs_fs_proc_net_init(net); } static void nfs_net_exit(struct net *net) { + nfs_fs_proc_net_exit(net); nfs_cleanup_cb_ident_idr(net); } diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index f415cbf9f6c3..9056622d2230 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -195,7 +195,16 @@ extern struct rpc_clnt *nfs4_find_or_create_ds_client(struct nfs_client *, #ifdef CONFIG_PROC_FS extern int __init nfs_fs_proc_init(void); extern void nfs_fs_proc_exit(void); +extern int nfs_fs_proc_net_init(struct net *net); +extern void nfs_fs_proc_net_exit(struct net *net); #else +static inline int nfs_fs_proc_net_init(struct net *net) +{ + return 0; +} +static inline void nfs_fs_proc_net_exit(struct net *net) +{ +} static inline int nfs_fs_proc_init(void) { return 0; @@ -238,11 +247,11 @@ void nfs_set_pgio_error(struct nfs_pgio_header *hdr, int error, loff_t pos); int nfs_iocounter_wait(struct nfs_io_counter *c); extern const struct nfs_pageio_ops nfs_pgio_rw_ops; -struct nfs_rw_header *nfs_rw_header_alloc(const struct nfs_rw_ops *); -void nfs_rw_header_free(struct nfs_pgio_header *); -void nfs_pgio_data_release(struct nfs_pgio_data *); +struct nfs_pgio_header *nfs_pgio_header_alloc(const struct nfs_rw_ops *); +void nfs_pgio_header_free(struct nfs_pgio_header *); +void nfs_pgio_data_destroy(struct nfs_pgio_header *); int nfs_generic_pgio(struct nfs_pageio_descriptor *, struct nfs_pgio_header *); -int nfs_initiate_pgio(struct rpc_clnt *, struct nfs_pgio_data *, +int nfs_initiate_pgio(struct rpc_clnt *, struct nfs_pgio_header *, const struct rpc_call_ops *, int, int); void nfs_free_request(struct nfs_page *req); @@ -348,7 +357,7 @@ extern int nfs_drop_inode(struct inode *); extern void nfs_clear_inode(struct inode *); extern void nfs_evict_inode(struct inode *); void nfs_zap_acl_cache(struct inode *inode); -extern int nfs_wait_bit_killable(void *word); +extern int nfs_wait_bit_killable(struct wait_bit_key *key); /* super.c */ extern const struct super_operations nfs_sops; @@ -442,6 +451,7 @@ int nfs_scan_commit(struct inode *inode, struct list_head *dst, void nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg, struct nfs_commit_info *cinfo); +int nfs_write_need_commit(struct nfs_pgio_header *); int nfs_generic_commit_list(struct inode *inode, struct list_head *head, int how, struct nfs_commit_info *cinfo); void nfs_retry_commit(struct list_head *page_list, @@ -482,7 +492,7 @@ static inline void nfs_inode_dio_wait(struct inode *inode) extern ssize_t nfs_dreq_bytes_left(struct nfs_direct_req *dreq); /* nfs4proc.c */ -extern void __nfs4_read_done_cb(struct nfs_pgio_data *); +extern void __nfs4_read_done_cb(struct nfs_pgio_header *); extern struct nfs_client *nfs4_init_client(struct nfs_client *clp, const struct rpc_timeout *timeparms, const char *ip_addr); diff --git a/fs/nfs/netns.h b/fs/nfs/netns.h index 8ee1fab83268..ef221fb8a183 100644 --- a/fs/nfs/netns.h +++ b/fs/nfs/netns.h @@ -29,6 +29,9 @@ struct nfs_net { #endif spinlock_t nfs_client_lock; struct timespec boot_time; +#ifdef CONFIG_PROC_FS + struct proc_dir_entry *proc_nfsfs; +#endif }; extern int nfs_net_id; diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c index 8f854dde4150..24c6898159cc 100644 --- a/fs/nfs/nfs3acl.c +++ b/fs/nfs/nfs3acl.c @@ -129,7 +129,10 @@ static int __nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl, .rpc_argp = &args, .rpc_resp = &fattr, }; - int status; + int status = 0; + + if (acl == NULL && (!S_ISDIR(inode->i_mode) || dfacl == NULL)) + goto out; status = -EOPNOTSUPP; if (!nfs_server_capable(inode, NFS_CAP_ACLS)) @@ -256,7 +259,7 @@ nfs3_list_one_acl(struct inode *inode, int type, const char *name, void *data, char *p = data + *result; acl = get_acl(inode, type); - if (!acl) + if (IS_ERR_OR_NULL(acl)) return 0; posix_acl_release(acl); diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index f0afa291fd58..809670eba52a 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -795,41 +795,44 @@ nfs3_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle, return status; } -static int nfs3_read_done(struct rpc_task *task, struct nfs_pgio_data *data) +static int nfs3_read_done(struct rpc_task *task, struct nfs_pgio_header *hdr) { - struct inode *inode = data->header->inode; + struct inode *inode = hdr->inode; if (nfs3_async_handle_jukebox(task, inode)) return -EAGAIN; nfs_invalidate_atime(inode); - nfs_refresh_inode(inode, &data->fattr); + nfs_refresh_inode(inode, &hdr->fattr); return 0; } -static void nfs3_proc_read_setup(struct nfs_pgio_data *data, struct rpc_message *msg) +static void nfs3_proc_read_setup(struct nfs_pgio_header *hdr, + struct rpc_message *msg) { msg->rpc_proc = &nfs3_procedures[NFS3PROC_READ]; } -static int nfs3_proc_pgio_rpc_prepare(struct rpc_task *task, struct nfs_pgio_data *data) +static int nfs3_proc_pgio_rpc_prepare(struct rpc_task *task, + struct nfs_pgio_header *hdr) { rpc_call_start(task); return 0; } -static int nfs3_write_done(struct rpc_task *task, struct nfs_pgio_data *data) +static int nfs3_write_done(struct rpc_task *task, struct nfs_pgio_header *hdr) { - struct inode *inode = data->header->inode; + struct inode *inode = hdr->inode; if (nfs3_async_handle_jukebox(task, inode)) return -EAGAIN; if (task->tk_status >= 0) - nfs_post_op_update_inode_force_wcc(inode, data->res.fattr); + nfs_post_op_update_inode_force_wcc(inode, hdr->res.fattr); return 0; } -static void nfs3_proc_write_setup(struct nfs_pgio_data *data, struct rpc_message *msg) +static void nfs3_proc_write_setup(struct nfs_pgio_header *hdr, + struct rpc_message *msg) { msg->rpc_proc = &nfs3_procedures[NFS3PROC_WRITE]; } diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index ba2affa51941..a8b855ab4e22 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -54,7 +54,7 @@ struct nfs4_minor_version_ops { const nfs4_stateid *); int (*find_root_sec)(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *); - int (*free_lock_state)(struct nfs_server *, + void (*free_lock_state)(struct nfs_server *, struct nfs4_lock_state *); const struct rpc_call_ops *call_sync_ops; const struct nfs4_state_recovery_ops *reboot_recovery_ops; @@ -129,17 +129,6 @@ enum { * LOCK: one nfs4_state (LOCK) to hold the lock stateid nfs4_state(OPEN) */ -struct nfs4_lock_owner { - unsigned int lo_type; -#define NFS4_ANY_LOCK_TYPE (0U) -#define NFS4_FLOCK_LOCK_TYPE (1U << 0) -#define NFS4_POSIX_LOCK_TYPE (1U << 1) - union { - fl_owner_t posix_owner; - pid_t flock_owner; - } lo_u; -}; - struct nfs4_lock_state { struct list_head ls_locks; /* Other lock stateids */ struct nfs4_state * ls_state; /* Pointer to open state */ @@ -149,7 +138,7 @@ struct nfs4_lock_state { struct nfs_seqid_counter ls_seqid; nfs4_stateid ls_stateid; atomic_t ls_count; - struct nfs4_lock_owner ls_owner; + fl_owner_t ls_owner; }; /* bits for nfs4_state->flags */ @@ -337,11 +326,11 @@ nfs4_state_protect(struct nfs_client *clp, unsigned long sp4_mode, */ static inline void nfs4_state_protect_write(struct nfs_client *clp, struct rpc_clnt **clntp, - struct rpc_message *msg, struct nfs_pgio_data *wdata) + struct rpc_message *msg, struct nfs_pgio_header *hdr) { if (_nfs4_state_protect(clp, NFS_SP4_MACH_CRED_WRITE, clntp, msg) && !test_bit(NFS_SP4_MACH_CRED_COMMIT, &clp->cl_sp4_flags)) - wdata->args.stable = NFS_FILE_SYNC; + hdr->args.stable = NFS_FILE_SYNC; } #else /* CONFIG_NFS_v4_1 */ static inline struct nfs4_session *nfs4_get_session(const struct nfs_server *server) @@ -369,7 +358,7 @@ nfs4_state_protect(struct nfs_client *clp, unsigned long sp4_flags, static inline void nfs4_state_protect_write(struct nfs_client *clp, struct rpc_clnt **clntp, - struct rpc_message *msg, struct nfs_pgio_data *wdata) + struct rpc_message *msg, struct nfs_pgio_header *hdr) { } #endif /* CONFIG_NFS_V4_1 */ diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index aa9ef4876046..ffdb28d86cf8 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -482,6 +482,16 @@ int nfs40_walk_client_list(struct nfs_client *new, spin_lock(&nn->nfs_client_lock); list_for_each_entry(pos, &nn->nfs_client_list, cl_share_link) { + + if (pos->rpc_ops != new->rpc_ops) + continue; + + if (pos->cl_proto != new->cl_proto) + continue; + + if (pos->cl_minorversion != new->cl_minorversion) + continue; + /* If "pos" isn't marked ready, we can't trust the * remaining fields in "pos" */ if (pos->cl_cons_state > NFS_CS_READY) { @@ -501,15 +511,6 @@ int nfs40_walk_client_list(struct nfs_client *new, if (pos->cl_cons_state != NFS_CS_READY) continue; - if (pos->rpc_ops != new->rpc_ops) - continue; - - if (pos->cl_proto != new->cl_proto) - continue; - - if (pos->cl_minorversion != new->cl_minorversion) - continue; - if (pos->cl_clientid != new->cl_clientid) continue; @@ -622,6 +623,16 @@ int nfs41_walk_client_list(struct nfs_client *new, spin_lock(&nn->nfs_client_lock); list_for_each_entry(pos, &nn->nfs_client_list, cl_share_link) { + + if (pos->rpc_ops != new->rpc_ops) + continue; + + if (pos->cl_proto != new->cl_proto) + continue; + + if (pos->cl_minorversion != new->cl_minorversion) + continue; + /* If "pos" isn't marked ready, we can't trust the * remaining fields in "pos", especially the client * ID and serverowner fields. Wait for CREATE_SESSION @@ -647,15 +658,6 @@ int nfs41_walk_client_list(struct nfs_client *new, if (pos->cl_cons_state != NFS_CS_READY) continue; - if (pos->rpc_ops != new->rpc_ops) - continue; - - if (pos->cl_proto != new->cl_proto) - continue; - - if (pos->cl_minorversion != new->cl_minorversion) - continue; - if (!nfs4_match_clientids(pos, new)) continue; @@ -855,6 +857,11 @@ struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp, }; struct rpc_timeout ds_timeout; struct nfs_client *clp; + char buf[INET6_ADDRSTRLEN + 1]; + + if (rpc_ntop(ds_addr, buf, sizeof(buf)) <= 0) + return ERR_PTR(-EINVAL); + cl_init.hostname = buf; /* * Set an authflavor equual to the MDS value. Use the MDS nfs_client diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 4bf3d97cc5a0..6ca0c8e7a945 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1952,6 +1952,14 @@ static int _nfs4_recover_proc_open(struct nfs4_opendata *data) return status; } +/* + * Additional permission checks in order to distinguish between an + * open for read, and an open for execute. This works around the + * fact that NFSv4 OPEN treats read and execute permissions as being + * the same. + * Note that in the non-execute case, we want to turn off permission + * checking if we just created a new file (POSIX open() semantics). + */ static int nfs4_opendata_access(struct rpc_cred *cred, struct nfs4_opendata *opendata, struct nfs4_state *state, fmode_t fmode, @@ -1966,14 +1974,14 @@ static int nfs4_opendata_access(struct rpc_cred *cred, return 0; mask = 0; - /* don't check MAY_WRITE - a newly created file may not have - * write mode bits, but POSIX allows the creating process to write. - * use openflags to check for exec, because fmode won't - * always have FMODE_EXEC set when file open for exec. */ + /* + * Use openflags to check for exec, because fmode won't + * always have FMODE_EXEC set when file open for exec. + */ if (openflags & __FMODE_EXEC) { /* ONLY check for exec rights */ mask = MAY_EXEC; - } else if (fmode & FMODE_READ) + } else if ((fmode & FMODE_READ) && !opendata->file_created) mask = MAY_READ; cache.cred = cred; @@ -2216,8 +2224,19 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata, seq = raw_seqcount_begin(&sp->so_reclaim_seqcount); ret = _nfs4_proc_open(opendata); - if (ret != 0) + if (ret != 0) { + if (ret == -ENOENT) { + dentry = opendata->dentry; + if (dentry->d_inode) + d_delete(dentry); + else if (d_unhashed(dentry)) + d_add(dentry, NULL); + + nfs_set_verifier(dentry, + nfs_save_change_attribute(opendata->dir->d_inode)); + } goto out; + } state = nfs4_opendata_to_nfs4_state(opendata); ret = PTR_ERR(state); @@ -2545,6 +2564,7 @@ static void nfs4_close_done(struct rpc_task *task, void *data) struct nfs4_closedata *calldata = data; struct nfs4_state *state = calldata->state; struct nfs_server *server = NFS_SERVER(calldata->inode); + nfs4_stateid *res_stateid = NULL; dprintk("%s: begin!\n", __func__); if (!nfs4_sequence_done(task, &calldata->res.seq_res)) @@ -2555,12 +2575,12 @@ static void nfs4_close_done(struct rpc_task *task, void *data) */ switch (task->tk_status) { case 0: - if (calldata->roc) + res_stateid = &calldata->res.stateid; + if (calldata->arg.fmode == 0 && calldata->roc) pnfs_roc_set_barrier(state->inode, calldata->roc_barrier); - nfs_clear_open_stateid(state, &calldata->res.stateid, 0); renew_lease(server, calldata->timestamp); - goto out_release; + break; case -NFS4ERR_ADMIN_REVOKED: case -NFS4ERR_STALE_STATEID: case -NFS4ERR_OLD_STATEID: @@ -2574,7 +2594,7 @@ static void nfs4_close_done(struct rpc_task *task, void *data) goto out_release; } } - nfs_clear_open_stateid(state, NULL, calldata->arg.fmode); + nfs_clear_open_stateid(state, res_stateid, calldata->arg.fmode); out_release: nfs_release_seqid(calldata->arg.seqid); nfs_refresh_inode(calldata->inode, calldata->res.fattr); @@ -2586,6 +2606,7 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data) struct nfs4_closedata *calldata = data; struct nfs4_state *state = calldata->state; struct inode *inode = calldata->inode; + bool is_rdonly, is_wronly, is_rdwr; int call_close = 0; dprintk("%s: begin!\n", __func__); @@ -2593,21 +2614,27 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data) goto out_wait; task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_DOWNGRADE]; - calldata->arg.fmode = FMODE_READ|FMODE_WRITE; spin_lock(&state->owner->so_lock); + is_rdwr = test_bit(NFS_O_RDWR_STATE, &state->flags); + is_rdonly = test_bit(NFS_O_RDONLY_STATE, &state->flags); + is_wronly = test_bit(NFS_O_WRONLY_STATE, &state->flags); /* Calculate the change in open mode */ + calldata->arg.fmode = 0; if (state->n_rdwr == 0) { - if (state->n_rdonly == 0) { - call_close |= test_bit(NFS_O_RDONLY_STATE, &state->flags); - call_close |= test_bit(NFS_O_RDWR_STATE, &state->flags); - calldata->arg.fmode &= ~FMODE_READ; - } - if (state->n_wronly == 0) { - call_close |= test_bit(NFS_O_WRONLY_STATE, &state->flags); - call_close |= test_bit(NFS_O_RDWR_STATE, &state->flags); - calldata->arg.fmode &= ~FMODE_WRITE; - } - } + if (state->n_rdonly == 0) + call_close |= is_rdonly; + else if (is_rdonly) + calldata->arg.fmode |= FMODE_READ; + if (state->n_wronly == 0) + call_close |= is_wronly; + else if (is_wronly) + calldata->arg.fmode |= FMODE_WRITE; + } else if (is_rdwr) + calldata->arg.fmode |= FMODE_READ|FMODE_WRITE; + + if (calldata->arg.fmode == 0) + call_close |= is_rdwr; + if (!nfs4_valid_open_stateid(state)) call_close = 0; spin_unlock(&state->owner->so_lock); @@ -2647,6 +2674,48 @@ static const struct rpc_call_ops nfs4_close_ops = { .rpc_release = nfs4_free_closedata, }; +static bool nfs4_state_has_opener(struct nfs4_state *state) +{ + /* first check existing openers */ + if (test_bit(NFS_O_RDONLY_STATE, &state->flags) != 0 && + state->n_rdonly != 0) + return true; + + if (test_bit(NFS_O_WRONLY_STATE, &state->flags) != 0 && + state->n_wronly != 0) + return true; + + if (test_bit(NFS_O_RDWR_STATE, &state->flags) != 0 && + state->n_rdwr != 0) + return true; + + return false; +} + +static bool nfs4_roc(struct inode *inode) +{ + struct nfs_inode *nfsi = NFS_I(inode); + struct nfs_open_context *ctx; + struct nfs4_state *state; + + spin_lock(&inode->i_lock); + list_for_each_entry(ctx, &nfsi->open_files, list) { + state = ctx->state; + if (state == NULL) + continue; + if (nfs4_state_has_opener(state)) { + spin_unlock(&inode->i_lock); + return false; + } + } + spin_unlock(&inode->i_lock); + + if (nfs4_check_delegation(inode, FMODE_READ)) + return false; + + return pnfs_roc(inode); +} + /* * It is possible for data to be read/written from a mem-mapped file * after the sys_close call (which hits the vfs layer as a flush). @@ -2697,7 +2766,7 @@ int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait) calldata->res.fattr = &calldata->fattr; calldata->res.seqid = calldata->arg.seqid; calldata->res.server = server; - calldata->roc = pnfs_roc(state->inode); + calldata->roc = nfs4_roc(state->inode); nfs_sb_active(calldata->inode->i_sb); msg.rpc_argp = &calldata->arg; @@ -4033,24 +4102,25 @@ static bool nfs4_error_stateid_expired(int err) return false; } -void __nfs4_read_done_cb(struct nfs_pgio_data *data) +void __nfs4_read_done_cb(struct nfs_pgio_header *hdr) { - nfs_invalidate_atime(data->header->inode); + nfs_invalidate_atime(hdr->inode); } -static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_pgio_data *data) +static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_pgio_header *hdr) { - struct nfs_server *server = NFS_SERVER(data->header->inode); + struct nfs_server *server = NFS_SERVER(hdr->inode); - trace_nfs4_read(data, task->tk_status); - if (nfs4_async_handle_error(task, server, data->args.context->state) == -EAGAIN) { + trace_nfs4_read(hdr, task->tk_status); + if (nfs4_async_handle_error(task, server, + hdr->args.context->state) == -EAGAIN) { rpc_restart_call_prepare(task); return -EAGAIN; } - __nfs4_read_done_cb(data); + __nfs4_read_done_cb(hdr); if (task->tk_status > 0) - renew_lease(server, data->timestamp); + renew_lease(server, hdr->timestamp); return 0; } @@ -4068,54 +4138,59 @@ static bool nfs4_read_stateid_changed(struct rpc_task *task, return true; } -static int nfs4_read_done(struct rpc_task *task, struct nfs_pgio_data *data) +static int nfs4_read_done(struct rpc_task *task, struct nfs_pgio_header *hdr) { dprintk("--> %s\n", __func__); - if (!nfs4_sequence_done(task, &data->res.seq_res)) + if (!nfs4_sequence_done(task, &hdr->res.seq_res)) return -EAGAIN; - if (nfs4_read_stateid_changed(task, &data->args)) + if (nfs4_read_stateid_changed(task, &hdr->args)) return -EAGAIN; - return data->pgio_done_cb ? data->pgio_done_cb(task, data) : - nfs4_read_done_cb(task, data); + return hdr->pgio_done_cb ? hdr->pgio_done_cb(task, hdr) : + nfs4_read_done_cb(task, hdr); } -static void nfs4_proc_read_setup(struct nfs_pgio_data *data, struct rpc_message *msg) +static void nfs4_proc_read_setup(struct nfs_pgio_header *hdr, + struct rpc_message *msg) { - data->timestamp = jiffies; - data->pgio_done_cb = nfs4_read_done_cb; + hdr->timestamp = jiffies; + hdr->pgio_done_cb = nfs4_read_done_cb; msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ]; - nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 0); + nfs4_init_sequence(&hdr->args.seq_args, &hdr->res.seq_res, 0); } -static int nfs4_proc_pgio_rpc_prepare(struct rpc_task *task, struct nfs_pgio_data *data) +static int nfs4_proc_pgio_rpc_prepare(struct rpc_task *task, + struct nfs_pgio_header *hdr) { - if (nfs4_setup_sequence(NFS_SERVER(data->header->inode), - &data->args.seq_args, - &data->res.seq_res, + if (nfs4_setup_sequence(NFS_SERVER(hdr->inode), + &hdr->args.seq_args, + &hdr->res.seq_res, task)) return 0; - if (nfs4_set_rw_stateid(&data->args.stateid, data->args.context, - data->args.lock_context, data->header->rw_ops->rw_mode) == -EIO) + if (nfs4_set_rw_stateid(&hdr->args.stateid, hdr->args.context, + hdr->args.lock_context, + hdr->rw_ops->rw_mode) == -EIO) return -EIO; - if (unlikely(test_bit(NFS_CONTEXT_BAD, &data->args.context->flags))) + if (unlikely(test_bit(NFS_CONTEXT_BAD, &hdr->args.context->flags))) return -EIO; return 0; } -static int nfs4_write_done_cb(struct rpc_task *task, struct nfs_pgio_data *data) +static int nfs4_write_done_cb(struct rpc_task *task, + struct nfs_pgio_header *hdr) { - struct inode *inode = data->header->inode; + struct inode *inode = hdr->inode; - trace_nfs4_write(data, task->tk_status); - if (nfs4_async_handle_error(task, NFS_SERVER(inode), data->args.context->state) == -EAGAIN) { + trace_nfs4_write(hdr, task->tk_status); + if (nfs4_async_handle_error(task, NFS_SERVER(inode), + hdr->args.context->state) == -EAGAIN) { rpc_restart_call_prepare(task); return -EAGAIN; } if (task->tk_status >= 0) { - renew_lease(NFS_SERVER(inode), data->timestamp); - nfs_post_op_update_inode_force_wcc(inode, &data->fattr); + renew_lease(NFS_SERVER(inode), hdr->timestamp); + nfs_post_op_update_inode_force_wcc(inode, &hdr->fattr); } return 0; } @@ -4134,23 +4209,21 @@ static bool nfs4_write_stateid_changed(struct rpc_task *task, return true; } -static int nfs4_write_done(struct rpc_task *task, struct nfs_pgio_data *data) +static int nfs4_write_done(struct rpc_task *task, struct nfs_pgio_header *hdr) { - if (!nfs4_sequence_done(task, &data->res.seq_res)) + if (!nfs4_sequence_done(task, &hdr->res.seq_res)) return -EAGAIN; - if (nfs4_write_stateid_changed(task, &data->args)) + if (nfs4_write_stateid_changed(task, &hdr->args)) return -EAGAIN; - return data->pgio_done_cb ? data->pgio_done_cb(task, data) : - nfs4_write_done_cb(task, data); + return hdr->pgio_done_cb ? hdr->pgio_done_cb(task, hdr) : + nfs4_write_done_cb(task, hdr); } static -bool nfs4_write_need_cache_consistency_data(const struct nfs_pgio_data *data) +bool nfs4_write_need_cache_consistency_data(struct nfs_pgio_header *hdr) { - const struct nfs_pgio_header *hdr = data->header; - /* Don't request attributes for pNFS or O_DIRECT writes */ - if (data->ds_clp != NULL || hdr->dreq != NULL) + if (hdr->ds_clp != NULL || hdr->dreq != NULL) return false; /* Otherwise, request attributes if and only if we don't hold * a delegation @@ -4158,23 +4231,24 @@ bool nfs4_write_need_cache_consistency_data(const struct nfs_pgio_data *data) return nfs4_have_delegation(hdr->inode, FMODE_READ) == 0; } -static void nfs4_proc_write_setup(struct nfs_pgio_data *data, struct rpc_message *msg) +static void nfs4_proc_write_setup(struct nfs_pgio_header *hdr, + struct rpc_message *msg) { - struct nfs_server *server = NFS_SERVER(data->header->inode); + struct nfs_server *server = NFS_SERVER(hdr->inode); - if (!nfs4_write_need_cache_consistency_data(data)) { - data->args.bitmask = NULL; - data->res.fattr = NULL; + if (!nfs4_write_need_cache_consistency_data(hdr)) { + hdr->args.bitmask = NULL; + hdr->res.fattr = NULL; } else - data->args.bitmask = server->cache_consistency_bitmask; + hdr->args.bitmask = server->cache_consistency_bitmask; - if (!data->pgio_done_cb) - data->pgio_done_cb = nfs4_write_done_cb; - data->res.server = server; - data->timestamp = jiffies; + if (!hdr->pgio_done_cb) + hdr->pgio_done_cb = nfs4_write_done_cb; + hdr->res.server = server; + hdr->timestamp = jiffies; msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_WRITE]; - nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 1); + nfs4_init_sequence(&hdr->args.seq_args, &hdr->res.seq_res, 1); } static void nfs4_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data) @@ -4881,6 +4955,18 @@ nfs4_init_callback_netid(const struct nfs_client *clp, char *buf, size_t len) return scnprintf(buf, len, "tcp"); } +static void nfs4_setclientid_done(struct rpc_task *task, void *calldata) +{ + struct nfs4_setclientid *sc = calldata; + + if (task->tk_status == 0) + sc->sc_cred = get_rpccred(task->tk_rqstp->rq_cred); +} + +static const struct rpc_call_ops nfs4_setclientid_ops = { + .rpc_call_done = nfs4_setclientid_done, +}; + /** * nfs4_proc_setclientid - Negotiate client ID * @clp: state data structure @@ -4907,6 +4993,14 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, .rpc_resp = res, .rpc_cred = cred, }; + struct rpc_task *task; + struct rpc_task_setup task_setup_data = { + .rpc_client = clp->cl_rpcclient, + .rpc_message = &msg, + .callback_ops = &nfs4_setclientid_ops, + .callback_data = &setclientid, + .flags = RPC_TASK_TIMEOUT, + }; int status; /* nfs_client_id4 */ @@ -4933,7 +5027,18 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, dprintk("NFS call setclientid auth=%s, '%.*s'\n", clp->cl_rpcclient->cl_auth->au_ops->au_name, setclientid.sc_name_len, setclientid.sc_name); - status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT); + task = rpc_run_task(&task_setup_data); + if (IS_ERR(task)) { + status = PTR_ERR(task); + goto out; + } + status = task->tk_status; + if (setclientid.sc_cred) { + clp->cl_acceptor = rpcauth_stringify_acceptor(setclientid.sc_cred); + put_rpccred(setclientid.sc_cred); + } + rpc_put_task(task); +out: trace_nfs4_setclientid(clp, status); dprintk("NFS reply setclientid: %d\n", status); return status; @@ -4975,6 +5080,9 @@ struct nfs4_delegreturndata { unsigned long timestamp; struct nfs_fattr fattr; int rpc_status; + struct inode *inode; + bool roc; + u32 roc_barrier; }; static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata) @@ -4988,7 +5096,6 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata) switch (task->tk_status) { case 0: renew_lease(data->res.server, data->timestamp); - break; case -NFS4ERR_ADMIN_REVOKED: case -NFS4ERR_DELEG_REVOKED: case -NFS4ERR_BAD_STATEID: @@ -4996,6 +5103,8 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata) case -NFS4ERR_STALE_STATEID: case -NFS4ERR_EXPIRED: task->tk_status = 0; + if (data->roc) + pnfs_roc_set_barrier(data->inode, data->roc_barrier); break; default: if (nfs4_async_handle_error(task, data->res.server, NULL) == @@ -5009,6 +5118,10 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata) static void nfs4_delegreturn_release(void *calldata) { + struct nfs4_delegreturndata *data = calldata; + + if (data->roc) + pnfs_roc_release(data->inode); kfree(calldata); } @@ -5018,6 +5131,10 @@ static void nfs4_delegreturn_prepare(struct rpc_task *task, void *data) d_data = (struct nfs4_delegreturndata *)data; + if (d_data->roc && + pnfs_roc_drain(d_data->inode, &d_data->roc_barrier, task)) + return; + nfs4_setup_sequence(d_data->res.server, &d_data->args.seq_args, &d_data->res.seq_res, @@ -5061,6 +5178,9 @@ static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, co nfs_fattr_init(data->res.fattr); data->timestamp = jiffies; data->rpc_status = 0; + data->inode = inode; + data->roc = list_empty(&NFS_I(inode)->open_files) ? + pnfs_roc(inode) : false; task_setup_data.callback_data = data; msg.rpc_argp = &data->args; @@ -5834,8 +5954,10 @@ struct nfs_release_lockowner_data { static void nfs4_release_lockowner_prepare(struct rpc_task *task, void *calldata) { struct nfs_release_lockowner_data *data = calldata; - nfs40_setup_sequence(data->server, - &data->args.seq_args, &data->res.seq_res, task); + struct nfs_server *server = data->server; + nfs40_setup_sequence(server, &data->args.seq_args, + &data->res.seq_res, task); + data->args.lock_owner.clientid = server->nfs_client->cl_clientid; data->timestamp = jiffies; } @@ -5852,6 +5974,8 @@ static void nfs4_release_lockowner_done(struct rpc_task *task, void *calldata) break; case -NFS4ERR_STALE_CLIENTID: case -NFS4ERR_EXPIRED: + nfs4_schedule_lease_recovery(server->nfs_client); + break; case -NFS4ERR_LEASE_MOVED: case -NFS4ERR_DELAY: if (nfs4_async_handle_error(task, server, NULL) == -EAGAIN) @@ -5872,7 +5996,8 @@ static const struct rpc_call_ops nfs4_release_lockowner_ops = { .rpc_release = nfs4_release_lockowner_release, }; -static int nfs4_release_lockowner(struct nfs_server *server, struct nfs4_lock_state *lsp) +static void +nfs4_release_lockowner(struct nfs_server *server, struct nfs4_lock_state *lsp) { struct nfs_release_lockowner_data *data; struct rpc_message msg = { @@ -5880,11 +6005,11 @@ static int nfs4_release_lockowner(struct nfs_server *server, struct nfs4_lock_st }; if (server->nfs_client->cl_mvops->minor_version != 0) - return -EINVAL; + return; data = kmalloc(sizeof(*data), GFP_NOFS); if (!data) - return -ENOMEM; + return; data->lsp = lsp; data->server = server; data->args.lock_owner.clientid = server->nfs_client->cl_clientid; @@ -5895,7 +6020,6 @@ static int nfs4_release_lockowner(struct nfs_server *server, struct nfs4_lock_st msg.rpc_resp = &data->res; nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 0); rpc_call_async(server->client, &msg, 0, &nfs4_release_lockowner_ops, data); - return 0; } #define XATTR_NAME_NFSV4_ACL "system.nfs4_acl" @@ -8182,7 +8306,8 @@ static int nfs41_free_stateid(struct nfs_server *server, return ret; } -static int nfs41_free_lock_state(struct nfs_server *server, struct nfs4_lock_state *lsp) +static void +nfs41_free_lock_state(struct nfs_server *server, struct nfs4_lock_state *lsp) { struct rpc_task *task; struct rpc_cred *cred = lsp->ls_state->owner->so_cred; @@ -8190,9 +8315,8 @@ static int nfs41_free_lock_state(struct nfs_server *server, struct nfs4_lock_sta task = _nfs41_free_stateid(server, &lsp->ls_stateid, cred, false); nfs4_free_lock_state(server, lsp); if (IS_ERR(task)) - return PTR_ERR(task); + return; rpc_put_task(task); - return 0; } static bool nfs41_match_stateid(const nfs4_stateid *s1, diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 848f6853c59e..22fe35104c0c 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -787,21 +787,12 @@ void nfs4_close_sync(struct nfs4_state *state, fmode_t fmode) * that is compatible with current->files */ static struct nfs4_lock_state * -__nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t fl_owner, pid_t fl_pid, unsigned int type) +__nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t fl_owner) { struct nfs4_lock_state *pos; list_for_each_entry(pos, &state->lock_states, ls_locks) { - if (type != NFS4_ANY_LOCK_TYPE && pos->ls_owner.lo_type != type) + if (pos->ls_owner != fl_owner) continue; - switch (pos->ls_owner.lo_type) { - case NFS4_POSIX_LOCK_TYPE: - if (pos->ls_owner.lo_u.posix_owner != fl_owner) - continue; - break; - case NFS4_FLOCK_LOCK_TYPE: - if (pos->ls_owner.lo_u.flock_owner != fl_pid) - continue; - } atomic_inc(&pos->ls_count); return pos; } @@ -813,7 +804,7 @@ __nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t fl_owner, pid_t fl_p * exists, return an uninitialized one. * */ -static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, fl_owner_t fl_owner, pid_t fl_pid, unsigned int type) +static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, fl_owner_t fl_owner) { struct nfs4_lock_state *lsp; struct nfs_server *server = state->owner->so_server; @@ -824,17 +815,7 @@ static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, f nfs4_init_seqid_counter(&lsp->ls_seqid); atomic_set(&lsp->ls_count, 1); lsp->ls_state = state; - lsp->ls_owner.lo_type = type; - switch (lsp->ls_owner.lo_type) { - case NFS4_FLOCK_LOCK_TYPE: - lsp->ls_owner.lo_u.flock_owner = fl_pid; - break; - case NFS4_POSIX_LOCK_TYPE: - lsp->ls_owner.lo_u.posix_owner = fl_owner; - break; - default: - goto out_free; - } + lsp->ls_owner = fl_owner; lsp->ls_seqid.owner_id = ida_simple_get(&server->lockowner_id, 0, 0, GFP_NOFS); if (lsp->ls_seqid.owner_id < 0) goto out_free; @@ -857,13 +838,13 @@ void nfs4_free_lock_state(struct nfs_server *server, struct nfs4_lock_state *lsp * exists, return an uninitialized one. * */ -static struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_owner_t owner, pid_t pid, unsigned int type) +static struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_owner_t owner) { struct nfs4_lock_state *lsp, *new = NULL; for(;;) { spin_lock(&state->state_lock); - lsp = __nfs4_find_lock_state(state, owner, pid, type); + lsp = __nfs4_find_lock_state(state, owner); if (lsp != NULL) break; if (new != NULL) { @@ -874,7 +855,7 @@ static struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_ break; } spin_unlock(&state->state_lock); - new = nfs4_alloc_lock_state(state, owner, pid, type); + new = nfs4_alloc_lock_state(state, owner); if (new == NULL) return NULL; } @@ -935,13 +916,7 @@ int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl) if (fl->fl_ops != NULL) return 0; - if (fl->fl_flags & FL_POSIX) - lsp = nfs4_get_lock_state(state, fl->fl_owner, 0, NFS4_POSIX_LOCK_TYPE); - else if (fl->fl_flags & FL_FLOCK) - lsp = nfs4_get_lock_state(state, NULL, fl->fl_pid, - NFS4_FLOCK_LOCK_TYPE); - else - return -EINVAL; + lsp = nfs4_get_lock_state(state, fl->fl_owner); if (lsp == NULL) return -ENOMEM; fl->fl_u.nfs4_fl.owner = lsp; @@ -955,7 +930,6 @@ static int nfs4_copy_lock_stateid(nfs4_stateid *dst, { struct nfs4_lock_state *lsp; fl_owner_t fl_owner; - pid_t fl_pid; int ret = -ENOENT; @@ -966,9 +940,8 @@ static int nfs4_copy_lock_stateid(nfs4_stateid *dst, goto out; fl_owner = lockowner->l_owner; - fl_pid = lockowner->l_pid; spin_lock(&state->state_lock); - lsp = __nfs4_find_lock_state(state, fl_owner, fl_pid, NFS4_ANY_LOCK_TYPE); + lsp = __nfs4_find_lock_state(state, fl_owner); if (lsp && test_bit(NFS_LOCK_LOST, &lsp->ls_flags)) ret = -EIO; else if (lsp != NULL && test_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags) != 0) { @@ -1251,8 +1224,8 @@ int nfs4_wait_clnt_recover(struct nfs_client *clp) might_sleep(); atomic_inc(&clp->cl_count); - res = wait_on_bit(&clp->cl_state, NFS4CLNT_MANAGER_RUNNING, - nfs_wait_bit_killable, TASK_KILLABLE); + res = wait_on_bit_action(&clp->cl_state, NFS4CLNT_MANAGER_RUNNING, + nfs_wait_bit_killable, TASK_KILLABLE); if (res) goto out; if (clp->cl_cons_state < 0) diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h index 0a744f3a86f6..1c32adbe728d 100644 --- a/fs/nfs/nfs4trace.h +++ b/fs/nfs/nfs4trace.h @@ -932,11 +932,11 @@ DEFINE_NFS4_IDMAP_EVENT(nfs4_map_gid_to_group); DECLARE_EVENT_CLASS(nfs4_read_event, TP_PROTO( - const struct nfs_pgio_data *data, + const struct nfs_pgio_header *hdr, int error ), - TP_ARGS(data, error), + TP_ARGS(hdr, error), TP_STRUCT__entry( __field(dev_t, dev) @@ -948,12 +948,12 @@ DECLARE_EVENT_CLASS(nfs4_read_event, ), TP_fast_assign( - const struct inode *inode = data->header->inode; + const struct inode *inode = hdr->inode; __entry->dev = inode->i_sb->s_dev; __entry->fileid = NFS_FILEID(inode); __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode)); - __entry->offset = data->args.offset; - __entry->count = data->args.count; + __entry->offset = hdr->args.offset; + __entry->count = hdr->args.count; __entry->error = error; ), @@ -972,10 +972,10 @@ DECLARE_EVENT_CLASS(nfs4_read_event, #define DEFINE_NFS4_READ_EVENT(name) \ DEFINE_EVENT(nfs4_read_event, name, \ TP_PROTO( \ - const struct nfs_pgio_data *data, \ + const struct nfs_pgio_header *hdr, \ int error \ ), \ - TP_ARGS(data, error)) + TP_ARGS(hdr, error)) DEFINE_NFS4_READ_EVENT(nfs4_read); #ifdef CONFIG_NFS_V4_1 DEFINE_NFS4_READ_EVENT(nfs4_pnfs_read); @@ -983,11 +983,11 @@ DEFINE_NFS4_READ_EVENT(nfs4_pnfs_read); DECLARE_EVENT_CLASS(nfs4_write_event, TP_PROTO( - const struct nfs_pgio_data *data, + const struct nfs_pgio_header *hdr, int error ), - TP_ARGS(data, error), + TP_ARGS(hdr, error), TP_STRUCT__entry( __field(dev_t, dev) @@ -999,12 +999,12 @@ DECLARE_EVENT_CLASS(nfs4_write_event, ), TP_fast_assign( - const struct inode *inode = data->header->inode; + const struct inode *inode = hdr->inode; __entry->dev = inode->i_sb->s_dev; __entry->fileid = NFS_FILEID(inode); __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode)); - __entry->offset = data->args.offset; - __entry->count = data->args.count; + __entry->offset = hdr->args.offset; + __entry->count = hdr->args.count; __entry->error = error; ), @@ -1024,10 +1024,10 @@ DECLARE_EVENT_CLASS(nfs4_write_event, #define DEFINE_NFS4_WRITE_EVENT(name) \ DEFINE_EVENT(nfs4_write_event, name, \ TP_PROTO( \ - const struct nfs_pgio_data *data, \ + const struct nfs_pgio_header *hdr, \ int error \ ), \ - TP_ARGS(data, error)) + TP_ARGS(hdr, error)) DEFINE_NFS4_WRITE_EVENT(nfs4_write); #ifdef CONFIG_NFS_V4_1 DEFINE_NFS4_WRITE_EVENT(nfs4_pnfs_write); diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 939ae606cfa4..e13b59d8d9aa 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -7092,7 +7092,7 @@ static int nfs4_xdr_dec_reclaim_complete(struct rpc_rqst *rqstp, if (!status) status = decode_sequence(xdr, &res->seq_res, rqstp); if (!status) - status = decode_reclaim_complete(xdr, (void *)NULL); + status = decode_reclaim_complete(xdr, NULL); return status; } diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c index 611320753db2..ae05278b3761 100644 --- a/fs/nfs/objlayout/objio_osd.c +++ b/fs/nfs/objlayout/objio_osd.c @@ -439,22 +439,21 @@ static void _read_done(struct ore_io_state *ios, void *private) objlayout_read_done(&objios->oir, status, objios->sync); } -int objio_read_pagelist(struct nfs_pgio_data *rdata) +int objio_read_pagelist(struct nfs_pgio_header *hdr) { - struct nfs_pgio_header *hdr = rdata->header; struct objio_state *objios; int ret; ret = objio_alloc_io_state(NFS_I(hdr->inode)->layout, true, - hdr->lseg, rdata->args.pages, rdata->args.pgbase, - rdata->args.offset, rdata->args.count, rdata, + hdr->lseg, hdr->args.pages, hdr->args.pgbase, + hdr->args.offset, hdr->args.count, hdr, GFP_KERNEL, &objios); if (unlikely(ret)) return ret; objios->ios->done = _read_done; dprintk("%s: offset=0x%llx length=0x%x\n", __func__, - rdata->args.offset, rdata->args.count); + hdr->args.offset, hdr->args.count); ret = ore_read(objios->ios); if (unlikely(ret)) objio_free_result(&objios->oir); @@ -487,11 +486,11 @@ static void _write_done(struct ore_io_state *ios, void *private) static struct page *__r4w_get_page(void *priv, u64 offset, bool *uptodate) { struct objio_state *objios = priv; - struct nfs_pgio_data *wdata = objios->oir.rpcdata; - struct address_space *mapping = wdata->header->inode->i_mapping; + struct nfs_pgio_header *hdr = objios->oir.rpcdata; + struct address_space *mapping = hdr->inode->i_mapping; pgoff_t index = offset / PAGE_SIZE; struct page *page; - loff_t i_size = i_size_read(wdata->header->inode); + loff_t i_size = i_size_read(hdr->inode); if (offset >= i_size) { *uptodate = true; @@ -531,15 +530,14 @@ static const struct _ore_r4w_op _r4w_op = { .put_page = &__r4w_put_page, }; -int objio_write_pagelist(struct nfs_pgio_data *wdata, int how) +int objio_write_pagelist(struct nfs_pgio_header *hdr, int how) { - struct nfs_pgio_header *hdr = wdata->header; struct objio_state *objios; int ret; ret = objio_alloc_io_state(NFS_I(hdr->inode)->layout, false, - hdr->lseg, wdata->args.pages, wdata->args.pgbase, - wdata->args.offset, wdata->args.count, wdata, GFP_NOFS, + hdr->lseg, hdr->args.pages, hdr->args.pgbase, + hdr->args.offset, hdr->args.count, hdr, GFP_NOFS, &objios); if (unlikely(ret)) return ret; @@ -551,7 +549,7 @@ int objio_write_pagelist(struct nfs_pgio_data *wdata, int how) objios->ios->done = _write_done; dprintk("%s: offset=0x%llx length=0x%x\n", __func__, - wdata->args.offset, wdata->args.count); + hdr->args.offset, hdr->args.count); ret = ore_write(objios->ios); if (unlikely(ret)) { objio_free_result(&objios->oir); diff --git a/fs/nfs/objlayout/objlayout.c b/fs/nfs/objlayout/objlayout.c index 765d3f54e986..697a16d11fac 100644 --- a/fs/nfs/objlayout/objlayout.c +++ b/fs/nfs/objlayout/objlayout.c @@ -229,36 +229,36 @@ objlayout_io_set_result(struct objlayout_io_res *oir, unsigned index, static void _rpc_read_complete(struct work_struct *work) { struct rpc_task *task; - struct nfs_pgio_data *rdata; + struct nfs_pgio_header *hdr; dprintk("%s enter\n", __func__); task = container_of(work, struct rpc_task, u.tk_work); - rdata = container_of(task, struct nfs_pgio_data, task); + hdr = container_of(task, struct nfs_pgio_header, task); - pnfs_ld_read_done(rdata); + pnfs_ld_read_done(hdr); } void objlayout_read_done(struct objlayout_io_res *oir, ssize_t status, bool sync) { - struct nfs_pgio_data *rdata = oir->rpcdata; + struct nfs_pgio_header *hdr = oir->rpcdata; - oir->status = rdata->task.tk_status = status; + oir->status = hdr->task.tk_status = status; if (status >= 0) - rdata->res.count = status; + hdr->res.count = status; else - rdata->header->pnfs_error = status; + hdr->pnfs_error = status; objlayout_iodone(oir); /* must not use oir after this point */ dprintk("%s: Return status=%zd eof=%d sync=%d\n", __func__, - status, rdata->res.eof, sync); + status, hdr->res.eof, sync); if (sync) - pnfs_ld_read_done(rdata); + pnfs_ld_read_done(hdr); else { - INIT_WORK(&rdata->task.u.tk_work, _rpc_read_complete); - schedule_work(&rdata->task.u.tk_work); + INIT_WORK(&hdr->task.u.tk_work, _rpc_read_complete); + schedule_work(&hdr->task.u.tk_work); } } @@ -266,12 +266,11 @@ objlayout_read_done(struct objlayout_io_res *oir, ssize_t status, bool sync) * Perform sync or async reads. */ enum pnfs_try_status -objlayout_read_pagelist(struct nfs_pgio_data *rdata) +objlayout_read_pagelist(struct nfs_pgio_header *hdr) { - struct nfs_pgio_header *hdr = rdata->header; struct inode *inode = hdr->inode; - loff_t offset = rdata->args.offset; - size_t count = rdata->args.count; + loff_t offset = hdr->args.offset; + size_t count = hdr->args.count; int err; loff_t eof; @@ -279,23 +278,23 @@ objlayout_read_pagelist(struct nfs_pgio_data *rdata) if (unlikely(offset + count > eof)) { if (offset >= eof) { err = 0; - rdata->res.count = 0; - rdata->res.eof = 1; + hdr->res.count = 0; + hdr->res.eof = 1; /*FIXME: do we need to call pnfs_ld_read_done() */ goto out; } count = eof - offset; } - rdata->res.eof = (offset + count) >= eof; - _fix_verify_io_params(hdr->lseg, &rdata->args.pages, - &rdata->args.pgbase, - rdata->args.offset, rdata->args.count); + hdr->res.eof = (offset + count) >= eof; + _fix_verify_io_params(hdr->lseg, &hdr->args.pages, + &hdr->args.pgbase, + hdr->args.offset, hdr->args.count); dprintk("%s: inode(%lx) offset 0x%llx count 0x%Zx eof=%d\n", - __func__, inode->i_ino, offset, count, rdata->res.eof); + __func__, inode->i_ino, offset, count, hdr->res.eof); - err = objio_read_pagelist(rdata); + err = objio_read_pagelist(hdr); out: if (unlikely(err)) { hdr->pnfs_error = err; @@ -312,38 +311,38 @@ objlayout_read_pagelist(struct nfs_pgio_data *rdata) static void _rpc_write_complete(struct work_struct *work) { struct rpc_task *task; - struct nfs_pgio_data *wdata; + struct nfs_pgio_header *hdr; dprintk("%s enter\n", __func__); task = container_of(work, struct rpc_task, u.tk_work); - wdata = container_of(task, struct nfs_pgio_data, task); + hdr = container_of(task, struct nfs_pgio_header, task); - pnfs_ld_write_done(wdata); + pnfs_ld_write_done(hdr); } void objlayout_write_done(struct objlayout_io_res *oir, ssize_t status, bool sync) { - struct nfs_pgio_data *wdata = oir->rpcdata; + struct nfs_pgio_header *hdr = oir->rpcdata; - oir->status = wdata->task.tk_status = status; + oir->status = hdr->task.tk_status = status; if (status >= 0) { - wdata->res.count = status; - wdata->verf.committed = oir->committed; + hdr->res.count = status; + hdr->verf.committed = oir->committed; } else { - wdata->header->pnfs_error = status; + hdr->pnfs_error = status; } objlayout_iodone(oir); /* must not use oir after this point */ dprintk("%s: Return status %zd committed %d sync=%d\n", __func__, - status, wdata->verf.committed, sync); + status, hdr->verf.committed, sync); if (sync) - pnfs_ld_write_done(wdata); + pnfs_ld_write_done(hdr); else { - INIT_WORK(&wdata->task.u.tk_work, _rpc_write_complete); - schedule_work(&wdata->task.u.tk_work); + INIT_WORK(&hdr->task.u.tk_work, _rpc_write_complete); + schedule_work(&hdr->task.u.tk_work); } } @@ -351,17 +350,15 @@ objlayout_write_done(struct objlayout_io_res *oir, ssize_t status, bool sync) * Perform sync or async writes. */ enum pnfs_try_status -objlayout_write_pagelist(struct nfs_pgio_data *wdata, - int how) +objlayout_write_pagelist(struct nfs_pgio_header *hdr, int how) { - struct nfs_pgio_header *hdr = wdata->header; int err; - _fix_verify_io_params(hdr->lseg, &wdata->args.pages, - &wdata->args.pgbase, - wdata->args.offset, wdata->args.count); + _fix_verify_io_params(hdr->lseg, &hdr->args.pages, + &hdr->args.pgbase, + hdr->args.offset, hdr->args.count); - err = objio_write_pagelist(wdata, how); + err = objio_write_pagelist(hdr, how); if (unlikely(err)) { hdr->pnfs_error = err; dprintk("%s: Returned Error %d\n", __func__, err); diff --git a/fs/nfs/objlayout/objlayout.h b/fs/nfs/objlayout/objlayout.h index 01e041029a6c..fd13f1d2f136 100644 --- a/fs/nfs/objlayout/objlayout.h +++ b/fs/nfs/objlayout/objlayout.h @@ -119,8 +119,8 @@ extern void objio_free_lseg(struct pnfs_layout_segment *lseg); */ extern void objio_free_result(struct objlayout_io_res *oir); -extern int objio_read_pagelist(struct nfs_pgio_data *rdata); -extern int objio_write_pagelist(struct nfs_pgio_data *wdata, int how); +extern int objio_read_pagelist(struct nfs_pgio_header *rdata); +extern int objio_write_pagelist(struct nfs_pgio_header *wdata, int how); /* * callback API @@ -168,10 +168,10 @@ extern struct pnfs_layout_segment *objlayout_alloc_lseg( extern void objlayout_free_lseg(struct pnfs_layout_segment *); extern enum pnfs_try_status objlayout_read_pagelist( - struct nfs_pgio_data *); + struct nfs_pgio_header *); extern enum pnfs_try_status objlayout_write_pagelist( - struct nfs_pgio_data *, + struct nfs_pgio_header *, int how); extern void objlayout_encode_layoutcommit( diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 17fab89f6358..be7cbce6e4c7 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -115,8 +115,8 @@ __nfs_iocounter_wait(struct nfs_io_counter *c) set_bit(NFS_IO_INPROGRESS, &c->flags); if (atomic_read(&c->io_count) == 0) break; - ret = nfs_wait_bit_killable(&c->flags); - } while (atomic_read(&c->io_count) != 0); + ret = nfs_wait_bit_killable(&q.key); + } while (atomic_read(&c->io_count) != 0 && !ret); finish_wait(wq, &q.wait); return ret; } @@ -136,28 +136,52 @@ nfs_iocounter_wait(struct nfs_io_counter *c) return __nfs_iocounter_wait(c); } -static int nfs_wait_bit_uninterruptible(void *word) -{ - io_schedule(); - return 0; -} - /* * nfs_page_group_lock - lock the head of the page group * @req - request in group that is to be locked + * @nonblock - if true don't block waiting for lock * * this lock must be held if modifying the page group list + * + * return 0 on success, < 0 on error: -EDELAY if nonblocking or the + * result from wait_on_bit_lock + * + * NOTE: calling with nonblock=false should always have set the + * lock bit (see fs/buffer.c and other uses of wait_on_bit_lock + * with TASK_UNINTERRUPTIBLE), so there is no need to check the result. + */ +int +nfs_page_group_lock(struct nfs_page *req, bool nonblock) +{ + struct nfs_page *head = req->wb_head; + + WARN_ON_ONCE(head != head->wb_head); + + if (!test_and_set_bit(PG_HEADLOCK, &head->wb_flags)) + return 0; + + if (!nonblock) + return wait_on_bit_lock(&head->wb_flags, PG_HEADLOCK, + TASK_UNINTERRUPTIBLE); + + return -EAGAIN; +} + +/* + * nfs_page_group_lock_wait - wait for the lock to clear, but don't grab it + * @req - a request in the group + * + * This is a blocking call to wait for the group lock to be cleared. */ void -nfs_page_group_lock(struct nfs_page *req) +nfs_page_group_lock_wait(struct nfs_page *req) { struct nfs_page *head = req->wb_head; WARN_ON_ONCE(head != head->wb_head); - wait_on_bit_lock(&head->wb_flags, PG_HEADLOCK, - nfs_wait_bit_uninterruptible, - TASK_UNINTERRUPTIBLE); + wait_on_bit(&head->wb_flags, PG_HEADLOCK, + TASK_UNINTERRUPTIBLE); } /* @@ -218,7 +242,7 @@ bool nfs_page_group_sync_on_bit(struct nfs_page *req, unsigned int bit) { bool ret; - nfs_page_group_lock(req); + nfs_page_group_lock(req, false); ret = nfs_page_group_sync_on_bit_locked(req, bit); nfs_page_group_unlock(req); @@ -435,9 +459,8 @@ void nfs_release_request(struct nfs_page *req) int nfs_wait_on_request(struct nfs_page *req) { - return wait_on_bit(&req->wb_flags, PG_BUSY, - nfs_wait_bit_uninterruptible, - TASK_UNINTERRUPTIBLE); + return wait_on_bit_io(&req->wb_flags, PG_BUSY, + TASK_UNINTERRUPTIBLE); } /* @@ -462,123 +485,72 @@ size_t nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, } EXPORT_SYMBOL_GPL(nfs_generic_pg_test); -static inline struct nfs_rw_header *NFS_RW_HEADER(struct nfs_pgio_header *hdr) +struct nfs_pgio_header *nfs_pgio_header_alloc(const struct nfs_rw_ops *ops) { - return container_of(hdr, struct nfs_rw_header, header); -} - -/** - * nfs_rw_header_alloc - Allocate a header for a read or write - * @ops: Read or write function vector - */ -struct nfs_rw_header *nfs_rw_header_alloc(const struct nfs_rw_ops *ops) -{ - struct nfs_rw_header *header = ops->rw_alloc_header(); - - if (header) { - struct nfs_pgio_header *hdr = &header->header; + struct nfs_pgio_header *hdr = ops->rw_alloc_header(); + if (hdr) { INIT_LIST_HEAD(&hdr->pages); spin_lock_init(&hdr->lock); - atomic_set(&hdr->refcnt, 0); hdr->rw_ops = ops; } - return header; + return hdr; } -EXPORT_SYMBOL_GPL(nfs_rw_header_alloc); +EXPORT_SYMBOL_GPL(nfs_pgio_header_alloc); /* - * nfs_rw_header_free - Free a read or write header + * nfs_pgio_header_free - Free a read or write header * @hdr: The header to free */ -void nfs_rw_header_free(struct nfs_pgio_header *hdr) +void nfs_pgio_header_free(struct nfs_pgio_header *hdr) { - hdr->rw_ops->rw_free_header(NFS_RW_HEADER(hdr)); + hdr->rw_ops->rw_free_header(hdr); } -EXPORT_SYMBOL_GPL(nfs_rw_header_free); +EXPORT_SYMBOL_GPL(nfs_pgio_header_free); /** - * nfs_pgio_data_alloc - Allocate pageio data - * @hdr: The header making a request - * @pagecount: Number of pages to create - */ -static struct nfs_pgio_data *nfs_pgio_data_alloc(struct nfs_pgio_header *hdr, - unsigned int pagecount) -{ - struct nfs_pgio_data *data, *prealloc; - - prealloc = &NFS_RW_HEADER(hdr)->rpc_data; - if (prealloc->header == NULL) - data = prealloc; - else - data = kzalloc(sizeof(*data), GFP_KERNEL); - if (!data) - goto out; - - if (nfs_pgarray_set(&data->pages, pagecount)) { - data->header = hdr; - atomic_inc(&hdr->refcnt); - } else { - if (data != prealloc) - kfree(data); - data = NULL; - } -out: - return data; -} - -/** - * nfs_pgio_data_release - Properly free pageio data - * @data: The data to release + * nfs_pgio_data_destroy - make @hdr suitable for reuse + * + * Frees memory and releases refs from nfs_generic_pgio, so that it may + * be called again. + * + * @hdr: A header that has had nfs_generic_pgio called */ -void nfs_pgio_data_release(struct nfs_pgio_data *data) +void nfs_pgio_data_destroy(struct nfs_pgio_header *hdr) { - struct nfs_pgio_header *hdr = data->header; - struct nfs_rw_header *pageio_header = NFS_RW_HEADER(hdr); - - put_nfs_open_context(data->args.context); - if (data->pages.pagevec != data->pages.page_array) - kfree(data->pages.pagevec); - if (data == &pageio_header->rpc_data) { - data->header = NULL; - data = NULL; - } - if (atomic_dec_and_test(&hdr->refcnt)) - hdr->completion_ops->completion(hdr); - /* Note: we only free the rpc_task after callbacks are done. - * See the comment in rpc_free_task() for why - */ - kfree(data); + put_nfs_open_context(hdr->args.context); + if (hdr->page_array.pagevec != hdr->page_array.page_array) + kfree(hdr->page_array.pagevec); } -EXPORT_SYMBOL_GPL(nfs_pgio_data_release); +EXPORT_SYMBOL_GPL(nfs_pgio_data_destroy); /** * nfs_pgio_rpcsetup - Set up arguments for a pageio call - * @data: The pageio data + * @hdr: The pageio hdr * @count: Number of bytes to read * @offset: Initial offset * @how: How to commit data (writes only) * @cinfo: Commit information for the call (writes only) */ -static void nfs_pgio_rpcsetup(struct nfs_pgio_data *data, +static void nfs_pgio_rpcsetup(struct nfs_pgio_header *hdr, unsigned int count, unsigned int offset, int how, struct nfs_commit_info *cinfo) { - struct nfs_page *req = data->header->req; + struct nfs_page *req = hdr->req; /* Set up the RPC argument and reply structs - * NB: take care not to mess about with data->commit et al. */ + * NB: take care not to mess about with hdr->commit et al. */ - data->args.fh = NFS_FH(data->header->inode); - data->args.offset = req_offset(req) + offset; + hdr->args.fh = NFS_FH(hdr->inode); + hdr->args.offset = req_offset(req) + offset; /* pnfs_set_layoutcommit needs this */ - data->mds_offset = data->args.offset; - data->args.pgbase = req->wb_pgbase + offset; - data->args.pages = data->pages.pagevec; - data->args.count = count; - data->args.context = get_nfs_open_context(req->wb_context); - data->args.lock_context = req->wb_lock_context; - data->args.stable = NFS_UNSTABLE; + hdr->mds_offset = hdr->args.offset; + hdr->args.pgbase = req->wb_pgbase + offset; + hdr->args.pages = hdr->page_array.pagevec; + hdr->args.count = count; + hdr->args.context = get_nfs_open_context(req->wb_context); + hdr->args.lock_context = req->wb_lock_context; + hdr->args.stable = NFS_UNSTABLE; switch (how & (FLUSH_STABLE | FLUSH_COND_STABLE)) { case 0: break; @@ -586,59 +558,59 @@ static void nfs_pgio_rpcsetup(struct nfs_pgio_data *data, if (nfs_reqs_to_commit(cinfo)) break; default: - data->args.stable = NFS_FILE_SYNC; + hdr->args.stable = NFS_FILE_SYNC; } - data->res.fattr = &data->fattr; - data->res.count = count; - data->res.eof = 0; - data->res.verf = &data->verf; - nfs_fattr_init(&data->fattr); + hdr->res.fattr = &hdr->fattr; + hdr->res.count = count; + hdr->res.eof = 0; + hdr->res.verf = &hdr->verf; + nfs_fattr_init(&hdr->fattr); } /** - * nfs_pgio_prepare - Prepare pageio data to go over the wire + * nfs_pgio_prepare - Prepare pageio hdr to go over the wire * @task: The current task - * @calldata: pageio data to prepare + * @calldata: pageio header to prepare */ static void nfs_pgio_prepare(struct rpc_task *task, void *calldata) { - struct nfs_pgio_data *data = calldata; + struct nfs_pgio_header *hdr = calldata; int err; - err = NFS_PROTO(data->header->inode)->pgio_rpc_prepare(task, data); + err = NFS_PROTO(hdr->inode)->pgio_rpc_prepare(task, hdr); if (err) rpc_exit(task, err); } -int nfs_initiate_pgio(struct rpc_clnt *clnt, struct nfs_pgio_data *data, +int nfs_initiate_pgio(struct rpc_clnt *clnt, struct nfs_pgio_header *hdr, const struct rpc_call_ops *call_ops, int how, int flags) { struct rpc_task *task; struct rpc_message msg = { - .rpc_argp = &data->args, - .rpc_resp = &data->res, - .rpc_cred = data->header->cred, + .rpc_argp = &hdr->args, + .rpc_resp = &hdr->res, + .rpc_cred = hdr->cred, }; struct rpc_task_setup task_setup_data = { .rpc_client = clnt, - .task = &data->task, + .task = &hdr->task, .rpc_message = &msg, .callback_ops = call_ops, - .callback_data = data, + .callback_data = hdr, .workqueue = nfsiod_workqueue, .flags = RPC_TASK_ASYNC | flags, }; int ret = 0; - data->header->rw_ops->rw_initiate(data, &msg, &task_setup_data, how); + hdr->rw_ops->rw_initiate(hdr, &msg, &task_setup_data, how); dprintk("NFS: %5u initiated pgio call " "(req %s/%llu, %u bytes @ offset %llu)\n", - data->task.tk_pid, - data->header->inode->i_sb->s_id, - (unsigned long long)NFS_FILEID(data->header->inode), - data->args.count, - (unsigned long long)data->args.offset); + hdr->task.tk_pid, + hdr->inode->i_sb->s_id, + (unsigned long long)NFS_FILEID(hdr->inode), + hdr->args.count, + (unsigned long long)hdr->args.offset); task = rpc_run_task(&task_setup_data); if (IS_ERR(task)) { @@ -665,22 +637,23 @@ static int nfs_pgio_error(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr) { set_bit(NFS_IOHDR_REDO, &hdr->flags); - nfs_pgio_data_release(hdr->data); - hdr->data = NULL; + nfs_pgio_data_destroy(hdr); + hdr->completion_ops->completion(hdr); desc->pg_completion_ops->error_cleanup(&desc->pg_list); return -ENOMEM; } /** * nfs_pgio_release - Release pageio data - * @calldata: The pageio data to release + * @calldata: The pageio header to release */ static void nfs_pgio_release(void *calldata) { - struct nfs_pgio_data *data = calldata; - if (data->header->rw_ops->rw_release) - data->header->rw_ops->rw_release(data); - nfs_pgio_data_release(data); + struct nfs_pgio_header *hdr = calldata; + if (hdr->rw_ops->rw_release) + hdr->rw_ops->rw_release(hdr); + nfs_pgio_data_destroy(hdr); + hdr->completion_ops->completion(hdr); } /** @@ -721,22 +694,22 @@ EXPORT_SYMBOL_GPL(nfs_pageio_init); /** * nfs_pgio_result - Basic pageio error handling * @task: The task that ran - * @calldata: Pageio data to check + * @calldata: Pageio header to check */ static void nfs_pgio_result(struct rpc_task *task, void *calldata) { - struct nfs_pgio_data *data = calldata; - struct inode *inode = data->header->inode; + struct nfs_pgio_header *hdr = calldata; + struct inode *inode = hdr->inode; dprintk("NFS: %s: %5u, (status %d)\n", __func__, task->tk_pid, task->tk_status); - if (data->header->rw_ops->rw_done(task, data, inode) != 0) + if (hdr->rw_ops->rw_done(task, hdr, inode) != 0) return; if (task->tk_status < 0) - nfs_set_pgio_error(data->header, task->tk_status, data->args.offset); + nfs_set_pgio_error(hdr, task->tk_status, hdr->args.offset); else - data->header->rw_ops->rw_result(task, data); + hdr->rw_ops->rw_result(task, hdr); } /* @@ -751,32 +724,42 @@ int nfs_generic_pgio(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr) { struct nfs_page *req; - struct page **pages; - struct nfs_pgio_data *data; + struct page **pages, + *last_page; struct list_head *head = &desc->pg_list; struct nfs_commit_info cinfo; + unsigned int pagecount, pageused; - data = nfs_pgio_data_alloc(hdr, nfs_page_array_len(desc->pg_base, - desc->pg_count)); - if (!data) + pagecount = nfs_page_array_len(desc->pg_base, desc->pg_count); + if (!nfs_pgarray_set(&hdr->page_array, pagecount)) return nfs_pgio_error(desc, hdr); nfs_init_cinfo(&cinfo, desc->pg_inode, desc->pg_dreq); - pages = data->pages.pagevec; + pages = hdr->page_array.pagevec; + last_page = NULL; + pageused = 0; while (!list_empty(head)) { req = nfs_list_entry(head->next); nfs_list_remove_request(req); nfs_list_add_request(req, &hdr->pages); - *pages++ = req->wb_page; + + if (WARN_ON_ONCE(pageused >= pagecount)) + return nfs_pgio_error(desc, hdr); + + if (!last_page || last_page != req->wb_page) { + *pages++ = last_page = req->wb_page; + pageused++; + } } + if (WARN_ON_ONCE(pageused != pagecount)) + return nfs_pgio_error(desc, hdr); if ((desc->pg_ioflags & FLUSH_COND_STABLE) && (desc->pg_moreio || nfs_reqs_to_commit(&cinfo))) desc->pg_ioflags &= ~FLUSH_COND_STABLE; /* Set up the argument struct */ - nfs_pgio_rpcsetup(data, desc->pg_count, 0, desc->pg_ioflags, &cinfo); - hdr->data = data; + nfs_pgio_rpcsetup(hdr, desc->pg_count, 0, desc->pg_ioflags, &cinfo); desc->pg_rpc_callops = &nfs_pgio_common_ops; return 0; } @@ -784,25 +767,20 @@ EXPORT_SYMBOL_GPL(nfs_generic_pgio); static int nfs_generic_pg_pgios(struct nfs_pageio_descriptor *desc) { - struct nfs_rw_header *rw_hdr; struct nfs_pgio_header *hdr; int ret; - rw_hdr = nfs_rw_header_alloc(desc->pg_rw_ops); - if (!rw_hdr) { + hdr = nfs_pgio_header_alloc(desc->pg_rw_ops); + if (!hdr) { desc->pg_completion_ops->error_cleanup(&desc->pg_list); return -ENOMEM; } - hdr = &rw_hdr->header; - nfs_pgheader_init(desc, hdr, nfs_rw_header_free); - atomic_inc(&hdr->refcnt); + nfs_pgheader_init(desc, hdr, nfs_pgio_header_free); ret = nfs_generic_pgio(desc, hdr); if (ret == 0) ret = nfs_initiate_pgio(NFS_CLIENT(hdr->inode), - hdr->data, desc->pg_rpc_callops, + hdr, desc->pg_rpc_callops, desc->pg_ioflags, 0); - if (atomic_dec_and_test(&hdr->refcnt)) - hdr->completion_ops->completion(hdr); return ret; } @@ -845,6 +823,14 @@ static bool nfs_can_coalesce_requests(struct nfs_page *prev, return false; if (req_offset(req) != req_offset(prev) + prev->wb_bytes) return false; + if (req->wb_page == prev->wb_page) { + if (req->wb_pgbase != prev->wb_pgbase + prev->wb_bytes) + return false; + } else { + if (req->wb_pgbase != 0 || + prev->wb_pgbase + prev->wb_bytes != PAGE_CACHE_SIZE) + return false; + } } size = pgio->pg_ops->pg_test(pgio, prev, req); WARN_ON_ONCE(size > req->wb_bytes); @@ -916,7 +902,7 @@ static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, unsigned int bytes_left = 0; unsigned int offset, pgbase; - nfs_page_group_lock(req); + nfs_page_group_lock(req, false); subreq = req; bytes_left = subreq->wb_bytes; @@ -938,7 +924,7 @@ static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, if (desc->pg_recoalesce) return 0; /* retry add_request for this subreq */ - nfs_page_group_lock(req); + nfs_page_group_lock(req, false); continue; } @@ -1013,7 +999,38 @@ int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, } while (ret); return ret; } -EXPORT_SYMBOL_GPL(nfs_pageio_add_request); + +/* + * nfs_pageio_resend - Transfer requests to new descriptor and resend + * @hdr - the pgio header to move request from + * @desc - the pageio descriptor to add requests to + * + * Try to move each request (nfs_page) from @hdr to @desc then attempt + * to send them. + * + * Returns 0 on success and < 0 on error. + */ +int nfs_pageio_resend(struct nfs_pageio_descriptor *desc, + struct nfs_pgio_header *hdr) +{ + LIST_HEAD(failed); + + desc->pg_dreq = hdr->dreq; + while (!list_empty(&hdr->pages)) { + struct nfs_page *req = nfs_list_entry(hdr->pages.next); + + nfs_list_remove_request(req); + if (!nfs_pageio_add_request(desc, req)) + nfs_list_add_request(req, &failed); + } + nfs_pageio_complete(desc); + if (!list_empty(&failed)) { + list_move(&failed, &hdr->pages); + return -EIO; + } + return 0; +} +EXPORT_SYMBOL_GPL(nfs_pageio_resend); /** * nfs_pageio_complete - Complete I/O on an nfs_pageio_descriptor @@ -1029,7 +1046,6 @@ void nfs_pageio_complete(struct nfs_pageio_descriptor *desc) break; } } -EXPORT_SYMBOL_GPL(nfs_pageio_complete); /** * nfs_pageio_cond_complete - Conditional I/O completion diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 6fdcd233d6f7..a3851debf8a2 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -361,6 +361,23 @@ pnfs_put_lseg(struct pnfs_layout_segment *lseg) } EXPORT_SYMBOL_GPL(pnfs_put_lseg); +static void pnfs_put_lseg_async_work(struct work_struct *work) +{ + struct pnfs_layout_segment *lseg; + + lseg = container_of(work, struct pnfs_layout_segment, pls_work); + + pnfs_put_lseg(lseg); +} + +void +pnfs_put_lseg_async(struct pnfs_layout_segment *lseg) +{ + INIT_WORK(&lseg->pls_work, pnfs_put_lseg_async_work); + schedule_work(&lseg->pls_work); +} +EXPORT_SYMBOL_GPL(pnfs_put_lseg_async); + static u64 end_offset(u64 start, u64 len) { @@ -1470,41 +1487,19 @@ pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, } EXPORT_SYMBOL_GPL(pnfs_generic_pg_test); -int pnfs_write_done_resend_to_mds(struct inode *inode, - struct list_head *head, - const struct nfs_pgio_completion_ops *compl_ops, - struct nfs_direct_req *dreq) +int pnfs_write_done_resend_to_mds(struct nfs_pgio_header *hdr) { struct nfs_pageio_descriptor pgio; - LIST_HEAD(failed); /* Resend all requests through the MDS */ - nfs_pageio_init_write(&pgio, inode, FLUSH_STABLE, true, compl_ops); - pgio.pg_dreq = dreq; - while (!list_empty(head)) { - struct nfs_page *req = nfs_list_entry(head->next); - - nfs_list_remove_request(req); - if (!nfs_pageio_add_request(&pgio, req)) - nfs_list_add_request(req, &failed); - } - nfs_pageio_complete(&pgio); - - if (!list_empty(&failed)) { - /* For some reason our attempt to resend pages. Mark the - * overall send request as having failed, and let - * nfs_writeback_release_full deal with the error. - */ - list_move(&failed, head); - return -EIO; - } - return 0; + nfs_pageio_init_write(&pgio, hdr->inode, FLUSH_STABLE, true, + hdr->completion_ops); + return nfs_pageio_resend(&pgio, hdr); } EXPORT_SYMBOL_GPL(pnfs_write_done_resend_to_mds); -static void pnfs_ld_handle_write_error(struct nfs_pgio_data *data) +static void pnfs_ld_handle_write_error(struct nfs_pgio_header *hdr) { - struct nfs_pgio_header *hdr = data->header; dprintk("pnfs write error = %d\n", hdr->pnfs_error); if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags & @@ -1512,50 +1507,42 @@ static void pnfs_ld_handle_write_error(struct nfs_pgio_data *data) pnfs_return_layout(hdr->inode); } if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) - data->task.tk_status = pnfs_write_done_resend_to_mds(hdr->inode, - &hdr->pages, - hdr->completion_ops, - hdr->dreq); + hdr->task.tk_status = pnfs_write_done_resend_to_mds(hdr); } /* * Called by non rpc-based layout drivers */ -void pnfs_ld_write_done(struct nfs_pgio_data *data) +void pnfs_ld_write_done(struct nfs_pgio_header *hdr) { - struct nfs_pgio_header *hdr = data->header; - - trace_nfs4_pnfs_write(data, hdr->pnfs_error); + trace_nfs4_pnfs_write(hdr, hdr->pnfs_error); if (!hdr->pnfs_error) { - pnfs_set_layoutcommit(data); - hdr->mds_ops->rpc_call_done(&data->task, data); + pnfs_set_layoutcommit(hdr); + hdr->mds_ops->rpc_call_done(&hdr->task, hdr); } else - pnfs_ld_handle_write_error(data); - hdr->mds_ops->rpc_release(data); + pnfs_ld_handle_write_error(hdr); + hdr->mds_ops->rpc_release(hdr); } EXPORT_SYMBOL_GPL(pnfs_ld_write_done); static void pnfs_write_through_mds(struct nfs_pageio_descriptor *desc, - struct nfs_pgio_data *data) + struct nfs_pgio_header *hdr) { - struct nfs_pgio_header *hdr = data->header; - if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { list_splice_tail_init(&hdr->pages, &desc->pg_list); nfs_pageio_reset_write_mds(desc); desc->pg_recoalesce = 1; } - nfs_pgio_data_release(data); + nfs_pgio_data_destroy(hdr); } static enum pnfs_try_status -pnfs_try_to_write_data(struct nfs_pgio_data *wdata, +pnfs_try_to_write_data(struct nfs_pgio_header *hdr, const struct rpc_call_ops *call_ops, struct pnfs_layout_segment *lseg, int how) { - struct nfs_pgio_header *hdr = wdata->header; struct inode *inode = hdr->inode; enum pnfs_try_status trypnfs; struct nfs_server *nfss = NFS_SERVER(inode); @@ -1563,8 +1550,8 @@ pnfs_try_to_write_data(struct nfs_pgio_data *wdata, hdr->mds_ops = call_ops; dprintk("%s: Writing ino:%lu %u@%llu (how %d)\n", __func__, - inode->i_ino, wdata->args.count, wdata->args.offset, how); - trypnfs = nfss->pnfs_curr_ld->write_pagelist(wdata, how); + inode->i_ino, hdr->args.count, hdr->args.offset, how); + trypnfs = nfss->pnfs_curr_ld->write_pagelist(hdr, how); if (trypnfs != PNFS_NOT_ATTEMPTED) nfs_inc_stats(inode, NFSIOS_PNFS_WRITE); dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs); @@ -1575,139 +1562,105 @@ static void pnfs_do_write(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr, int how) { - struct nfs_pgio_data *data = hdr->data; const struct rpc_call_ops *call_ops = desc->pg_rpc_callops; struct pnfs_layout_segment *lseg = desc->pg_lseg; enum pnfs_try_status trypnfs; desc->pg_lseg = NULL; - trypnfs = pnfs_try_to_write_data(data, call_ops, lseg, how); + trypnfs = pnfs_try_to_write_data(hdr, call_ops, lseg, how); if (trypnfs == PNFS_NOT_ATTEMPTED) - pnfs_write_through_mds(desc, data); + pnfs_write_through_mds(desc, hdr); pnfs_put_lseg(lseg); } static void pnfs_writehdr_free(struct nfs_pgio_header *hdr) { pnfs_put_lseg(hdr->lseg); - nfs_rw_header_free(hdr); + nfs_pgio_header_free(hdr); } EXPORT_SYMBOL_GPL(pnfs_writehdr_free); int pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) { - struct nfs_rw_header *whdr; struct nfs_pgio_header *hdr; int ret; - whdr = nfs_rw_header_alloc(desc->pg_rw_ops); - if (!whdr) { + hdr = nfs_pgio_header_alloc(desc->pg_rw_ops); + if (!hdr) { desc->pg_completion_ops->error_cleanup(&desc->pg_list); pnfs_put_lseg(desc->pg_lseg); desc->pg_lseg = NULL; return -ENOMEM; } - hdr = &whdr->header; nfs_pgheader_init(desc, hdr, pnfs_writehdr_free); hdr->lseg = pnfs_get_lseg(desc->pg_lseg); - atomic_inc(&hdr->refcnt); ret = nfs_generic_pgio(desc, hdr); if (ret != 0) { pnfs_put_lseg(desc->pg_lseg); desc->pg_lseg = NULL; } else pnfs_do_write(desc, hdr, desc->pg_ioflags); - if (atomic_dec_and_test(&hdr->refcnt)) - hdr->completion_ops->completion(hdr); return ret; } EXPORT_SYMBOL_GPL(pnfs_generic_pg_writepages); -int pnfs_read_done_resend_to_mds(struct inode *inode, - struct list_head *head, - const struct nfs_pgio_completion_ops *compl_ops, - struct nfs_direct_req *dreq) +int pnfs_read_done_resend_to_mds(struct nfs_pgio_header *hdr) { struct nfs_pageio_descriptor pgio; - LIST_HEAD(failed); /* Resend all requests through the MDS */ - nfs_pageio_init_read(&pgio, inode, true, compl_ops); - pgio.pg_dreq = dreq; - while (!list_empty(head)) { - struct nfs_page *req = nfs_list_entry(head->next); - - nfs_list_remove_request(req); - if (!nfs_pageio_add_request(&pgio, req)) - nfs_list_add_request(req, &failed); - } - nfs_pageio_complete(&pgio); - - if (!list_empty(&failed)) { - list_move(&failed, head); - return -EIO; - } - return 0; + nfs_pageio_init_read(&pgio, hdr->inode, true, hdr->completion_ops); + return nfs_pageio_resend(&pgio, hdr); } EXPORT_SYMBOL_GPL(pnfs_read_done_resend_to_mds); -static void pnfs_ld_handle_read_error(struct nfs_pgio_data *data) +static void pnfs_ld_handle_read_error(struct nfs_pgio_header *hdr) { - struct nfs_pgio_header *hdr = data->header; - dprintk("pnfs read error = %d\n", hdr->pnfs_error); if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags & PNFS_LAYOUTRET_ON_ERROR) { pnfs_return_layout(hdr->inode); } if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) - data->task.tk_status = pnfs_read_done_resend_to_mds(hdr->inode, - &hdr->pages, - hdr->completion_ops, - hdr->dreq); + hdr->task.tk_status = pnfs_read_done_resend_to_mds(hdr); } /* * Called by non rpc-based layout drivers */ -void pnfs_ld_read_done(struct nfs_pgio_data *data) +void pnfs_ld_read_done(struct nfs_pgio_header *hdr) { - struct nfs_pgio_header *hdr = data->header; - - trace_nfs4_pnfs_read(data, hdr->pnfs_error); + trace_nfs4_pnfs_read(hdr, hdr->pnfs_error); if (likely(!hdr->pnfs_error)) { - __nfs4_read_done_cb(data); - hdr->mds_ops->rpc_call_done(&data->task, data); + __nfs4_read_done_cb(hdr); + hdr->mds_ops->rpc_call_done(&hdr->task, hdr); } else - pnfs_ld_handle_read_error(data); - hdr->mds_ops->rpc_release(data); + pnfs_ld_handle_read_error(hdr); + hdr->mds_ops->rpc_release(hdr); } EXPORT_SYMBOL_GPL(pnfs_ld_read_done); static void pnfs_read_through_mds(struct nfs_pageio_descriptor *desc, - struct nfs_pgio_data *data) + struct nfs_pgio_header *hdr) { - struct nfs_pgio_header *hdr = data->header; - if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { list_splice_tail_init(&hdr->pages, &desc->pg_list); nfs_pageio_reset_read_mds(desc); desc->pg_recoalesce = 1; } - nfs_pgio_data_release(data); + nfs_pgio_data_destroy(hdr); } /* * Call the appropriate parallel I/O subsystem read function. */ static enum pnfs_try_status -pnfs_try_to_read_data(struct nfs_pgio_data *rdata, +pnfs_try_to_read_data(struct nfs_pgio_header *hdr, const struct rpc_call_ops *call_ops, struct pnfs_layout_segment *lseg) { - struct nfs_pgio_header *hdr = rdata->header; struct inode *inode = hdr->inode; struct nfs_server *nfss = NFS_SERVER(inode); enum pnfs_try_status trypnfs; @@ -1715,9 +1668,9 @@ pnfs_try_to_read_data(struct nfs_pgio_data *rdata, hdr->mds_ops = call_ops; dprintk("%s: Reading ino:%lu %u@%llu\n", - __func__, inode->i_ino, rdata->args.count, rdata->args.offset); + __func__, inode->i_ino, hdr->args.count, hdr->args.offset); - trypnfs = nfss->pnfs_curr_ld->read_pagelist(rdata); + trypnfs = nfss->pnfs_curr_ld->read_pagelist(hdr); if (trypnfs != PNFS_NOT_ATTEMPTED) nfs_inc_stats(inode, NFSIOS_PNFS_READ); dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs); @@ -1727,52 +1680,46 @@ pnfs_try_to_read_data(struct nfs_pgio_data *rdata, static void pnfs_do_read(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr) { - struct nfs_pgio_data *data = hdr->data; const struct rpc_call_ops *call_ops = desc->pg_rpc_callops; struct pnfs_layout_segment *lseg = desc->pg_lseg; enum pnfs_try_status trypnfs; desc->pg_lseg = NULL; - trypnfs = pnfs_try_to_read_data(data, call_ops, lseg); + trypnfs = pnfs_try_to_read_data(hdr, call_ops, lseg); if (trypnfs == PNFS_NOT_ATTEMPTED) - pnfs_read_through_mds(desc, data); + pnfs_read_through_mds(desc, hdr); pnfs_put_lseg(lseg); } static void pnfs_readhdr_free(struct nfs_pgio_header *hdr) { pnfs_put_lseg(hdr->lseg); - nfs_rw_header_free(hdr); + nfs_pgio_header_free(hdr); } EXPORT_SYMBOL_GPL(pnfs_readhdr_free); int pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) { - struct nfs_rw_header *rhdr; struct nfs_pgio_header *hdr; int ret; - rhdr = nfs_rw_header_alloc(desc->pg_rw_ops); - if (!rhdr) { + hdr = nfs_pgio_header_alloc(desc->pg_rw_ops); + if (!hdr) { desc->pg_completion_ops->error_cleanup(&desc->pg_list); ret = -ENOMEM; pnfs_put_lseg(desc->pg_lseg); desc->pg_lseg = NULL; return ret; } - hdr = &rhdr->header; nfs_pgheader_init(desc, hdr, pnfs_readhdr_free); hdr->lseg = pnfs_get_lseg(desc->pg_lseg); - atomic_inc(&hdr->refcnt); ret = nfs_generic_pgio(desc, hdr); if (ret != 0) { pnfs_put_lseg(desc->pg_lseg); desc->pg_lseg = NULL; } else pnfs_do_read(desc, hdr); - if (atomic_dec_and_test(&hdr->refcnt)) - hdr->completion_ops->completion(hdr); return ret; } EXPORT_SYMBOL_GPL(pnfs_generic_pg_readpages); @@ -1820,12 +1767,11 @@ void pnfs_set_lo_fail(struct pnfs_layout_segment *lseg) EXPORT_SYMBOL_GPL(pnfs_set_lo_fail); void -pnfs_set_layoutcommit(struct nfs_pgio_data *wdata) +pnfs_set_layoutcommit(struct nfs_pgio_header *hdr) { - struct nfs_pgio_header *hdr = wdata->header; struct inode *inode = hdr->inode; struct nfs_inode *nfsi = NFS_I(inode); - loff_t end_pos = wdata->mds_offset + wdata->res.count; + loff_t end_pos = hdr->mds_offset + hdr->res.count; bool mark_as_dirty = false; spin_lock(&inode->i_lock); @@ -1885,7 +1831,7 @@ pnfs_layoutcommit_inode(struct inode *inode, bool sync) if (test_and_set_bit(NFS_INO_LAYOUTCOMMITTING, &nfsi->flags)) { if (!sync) goto out; - status = wait_on_bit_lock(&nfsi->flags, + status = wait_on_bit_lock_action(&nfsi->flags, NFS_INO_LAYOUTCOMMITTING, nfs_wait_bit_killable, TASK_KILLABLE); diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 4fb309a2b4c4..aca3dff5dae6 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -32,6 +32,7 @@ #include <linux/nfs_fs.h> #include <linux/nfs_page.h> +#include <linux/workqueue.h> enum { NFS_LSEG_VALID = 0, /* cleared when lseg is recalled/returned */ @@ -46,6 +47,7 @@ struct pnfs_layout_segment { atomic_t pls_refcount; unsigned long pls_flags; struct pnfs_layout_hdr *pls_layout; + struct work_struct pls_work; }; enum pnfs_try_status { @@ -104,6 +106,8 @@ struct pnfs_layoutdriver_type { int max); void (*recover_commit_reqs) (struct list_head *list, struct nfs_commit_info *cinfo); + struct nfs_page * (*search_commit_reqs)(struct nfs_commit_info *cinfo, + struct page *page); int (*commit_pagelist)(struct inode *inode, struct list_head *mds_pages, int how, @@ -113,8 +117,8 @@ struct pnfs_layoutdriver_type { * Return PNFS_ATTEMPTED to indicate the layout code has attempted * I/O, else return PNFS_NOT_ATTEMPTED to fall back to normal NFS */ - enum pnfs_try_status (*read_pagelist) (struct nfs_pgio_data *nfs_data); - enum pnfs_try_status (*write_pagelist) (struct nfs_pgio_data *nfs_data, int how); + enum pnfs_try_status (*read_pagelist)(struct nfs_pgio_header *); + enum pnfs_try_status (*write_pagelist)(struct nfs_pgio_header *, int); void (*free_deviceid_node) (struct nfs4_deviceid_node *); @@ -179,6 +183,7 @@ extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp); /* pnfs.c */ void pnfs_get_layout_hdr(struct pnfs_layout_hdr *lo); void pnfs_put_lseg(struct pnfs_layout_segment *lseg); +void pnfs_put_lseg_async(struct pnfs_layout_segment *lseg); void set_pnfs_layoutdriver(struct nfs_server *, const struct nfs_fh *, u32); void unset_pnfs_layoutdriver(struct nfs_server *); @@ -213,13 +218,13 @@ bool pnfs_roc(struct inode *ino); void pnfs_roc_release(struct inode *ino); void pnfs_roc_set_barrier(struct inode *ino, u32 barrier); bool pnfs_roc_drain(struct inode *ino, u32 *barrier, struct rpc_task *task); -void pnfs_set_layoutcommit(struct nfs_pgio_data *wdata); +void pnfs_set_layoutcommit(struct nfs_pgio_header *); void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data); int pnfs_layoutcommit_inode(struct inode *inode, bool sync); int _pnfs_return_layout(struct inode *); int pnfs_commit_and_return_layout(struct inode *); -void pnfs_ld_write_done(struct nfs_pgio_data *); -void pnfs_ld_read_done(struct nfs_pgio_data *); +void pnfs_ld_write_done(struct nfs_pgio_header *); +void pnfs_ld_read_done(struct nfs_pgio_header *); struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx, loff_t pos, @@ -228,12 +233,8 @@ struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino, gfp_t gfp_flags); void nfs4_deviceid_mark_client_invalid(struct nfs_client *clp); -int pnfs_read_done_resend_to_mds(struct inode *inode, struct list_head *head, - const struct nfs_pgio_completion_ops *compl_ops, - struct nfs_direct_req *dreq); -int pnfs_write_done_resend_to_mds(struct inode *inode, struct list_head *head, - const struct nfs_pgio_completion_ops *compl_ops, - struct nfs_direct_req *dreq); +int pnfs_read_done_resend_to_mds(struct nfs_pgio_header *); +int pnfs_write_done_resend_to_mds(struct nfs_pgio_header *); struct nfs4_threshold *pnfs_mdsthreshold_alloc(void); /* nfs4_deviceid_flags */ @@ -345,6 +346,17 @@ pnfs_recover_commit_reqs(struct inode *inode, struct list_head *list, NFS_SERVER(inode)->pnfs_curr_ld->recover_commit_reqs(list, cinfo); } +static inline struct nfs_page * +pnfs_search_commit_reqs(struct inode *inode, struct nfs_commit_info *cinfo, + struct page *page) +{ + struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld; + + if (ld == NULL || ld->search_commit_reqs == NULL) + return NULL; + return ld->search_commit_reqs(cinfo, page); +} + /* Should the pNFS client commit and return the layout upon a setattr */ static inline bool pnfs_ld_layoutret_on_setattr(struct inode *inode) @@ -410,6 +422,10 @@ static inline void pnfs_put_lseg(struct pnfs_layout_segment *lseg) { } +static inline void pnfs_put_lseg_async(struct pnfs_layout_segment *lseg) +{ +} + static inline int pnfs_return_layout(struct inode *ino) { return 0; @@ -496,6 +512,13 @@ pnfs_recover_commit_reqs(struct inode *inode, struct list_head *list, { } +static inline struct nfs_page * +pnfs_search_commit_reqs(struct inode *inode, struct nfs_commit_info *cinfo, + struct page *page) +{ + return NULL; +} + static inline int pnfs_layoutcommit_inode(struct inode *inode, bool sync) { return 0; diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index c171ce1a8a30..b09cc23d6f43 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -578,46 +578,49 @@ nfs_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle, return 0; } -static int nfs_read_done(struct rpc_task *task, struct nfs_pgio_data *data) +static int nfs_read_done(struct rpc_task *task, struct nfs_pgio_header *hdr) { - struct inode *inode = data->header->inode; + struct inode *inode = hdr->inode; nfs_invalidate_atime(inode); if (task->tk_status >= 0) { - nfs_refresh_inode(inode, data->res.fattr); + nfs_refresh_inode(inode, hdr->res.fattr); /* Emulate the eof flag, which isn't normally needed in NFSv2 * as it is guaranteed to always return the file attributes */ - if (data->args.offset + data->res.count >= data->res.fattr->size) - data->res.eof = 1; + if (hdr->args.offset + hdr->res.count >= hdr->res.fattr->size) + hdr->res.eof = 1; } return 0; } -static void nfs_proc_read_setup(struct nfs_pgio_data *data, struct rpc_message *msg) +static void nfs_proc_read_setup(struct nfs_pgio_header *hdr, + struct rpc_message *msg) { msg->rpc_proc = &nfs_procedures[NFSPROC_READ]; } -static int nfs_proc_pgio_rpc_prepare(struct rpc_task *task, struct nfs_pgio_data *data) +static int nfs_proc_pgio_rpc_prepare(struct rpc_task *task, + struct nfs_pgio_header *hdr) { rpc_call_start(task); return 0; } -static int nfs_write_done(struct rpc_task *task, struct nfs_pgio_data *data) +static int nfs_write_done(struct rpc_task *task, struct nfs_pgio_header *hdr) { - struct inode *inode = data->header->inode; + struct inode *inode = hdr->inode; if (task->tk_status >= 0) - nfs_post_op_update_inode_force_wcc(inode, data->res.fattr); + nfs_post_op_update_inode_force_wcc(inode, hdr->res.fattr); return 0; } -static void nfs_proc_write_setup(struct nfs_pgio_data *data, struct rpc_message *msg) +static void nfs_proc_write_setup(struct nfs_pgio_header *hdr, + struct rpc_message *msg) { /* Note: NFSv2 ignores @stable and always uses NFS_FILE_SYNC */ - data->args.stable = NFS_FILE_SYNC; + hdr->args.stable = NFS_FILE_SYNC; msg->rpc_proc = &nfs_procedures[NFSPROC_WRITE]; } diff --git a/fs/nfs/read.c b/fs/nfs/read.c index e818a475ca64..beff2769c5c5 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -33,12 +33,12 @@ static const struct nfs_rw_ops nfs_rw_read_ops; static struct kmem_cache *nfs_rdata_cachep; -static struct nfs_rw_header *nfs_readhdr_alloc(void) +static struct nfs_pgio_header *nfs_readhdr_alloc(void) { return kmem_cache_zalloc(nfs_rdata_cachep, GFP_KERNEL); } -static void nfs_readhdr_free(struct nfs_rw_header *rhdr) +static void nfs_readhdr_free(struct nfs_pgio_header *rhdr) { kmem_cache_free(nfs_rdata_cachep, rhdr); } @@ -115,12 +115,6 @@ static void nfs_readpage_release(struct nfs_page *req) unlock_page(req->wb_page); } - - dprintk("NFS: read done (%s/%Lu %d@%Ld)\n", - req->wb_context->dentry->d_inode->i_sb->s_id, - (unsigned long long)NFS_FILEID(req->wb_context->dentry->d_inode), - req->wb_bytes, - (long long)req_offset(req)); nfs_release_request(req); } @@ -172,14 +166,15 @@ out: hdr->release(hdr); } -static void nfs_initiate_read(struct nfs_pgio_data *data, struct rpc_message *msg, +static void nfs_initiate_read(struct nfs_pgio_header *hdr, + struct rpc_message *msg, struct rpc_task_setup *task_setup_data, int how) { - struct inode *inode = data->header->inode; + struct inode *inode = hdr->inode; int swap_flags = IS_SWAPFILE(inode) ? NFS_RPC_SWAPFLAGS : 0; task_setup_data->flags |= swap_flags; - NFS_PROTO(inode)->read_setup(data, msg); + NFS_PROTO(inode)->read_setup(hdr, msg); } static void @@ -203,14 +198,15 @@ static const struct nfs_pgio_completion_ops nfs_async_read_completion_ops = { * This is the callback from RPC telling us whether a reply was * received or some error occurred (timeout or socket shutdown). */ -static int nfs_readpage_done(struct rpc_task *task, struct nfs_pgio_data *data, +static int nfs_readpage_done(struct rpc_task *task, + struct nfs_pgio_header *hdr, struct inode *inode) { - int status = NFS_PROTO(inode)->read_done(task, data); + int status = NFS_PROTO(inode)->read_done(task, hdr); if (status != 0) return status; - nfs_add_stats(inode, NFSIOS_SERVERREADBYTES, data->res.count); + nfs_add_stats(inode, NFSIOS_SERVERREADBYTES, hdr->res.count); if (task->tk_status == -ESTALE) { set_bit(NFS_INO_STALE, &NFS_I(inode)->flags); @@ -219,34 +215,34 @@ static int nfs_readpage_done(struct rpc_task *task, struct nfs_pgio_data *data, return 0; } -static void nfs_readpage_retry(struct rpc_task *task, struct nfs_pgio_data *data) +static void nfs_readpage_retry(struct rpc_task *task, + struct nfs_pgio_header *hdr) { - struct nfs_pgio_args *argp = &data->args; - struct nfs_pgio_res *resp = &data->res; + struct nfs_pgio_args *argp = &hdr->args; + struct nfs_pgio_res *resp = &hdr->res; /* This is a short read! */ - nfs_inc_stats(data->header->inode, NFSIOS_SHORTREAD); + nfs_inc_stats(hdr->inode, NFSIOS_SHORTREAD); /* Has the server at least made some progress? */ if (resp->count == 0) { - nfs_set_pgio_error(data->header, -EIO, argp->offset); + nfs_set_pgio_error(hdr, -EIO, argp->offset); return; } - /* Yes, so retry the read at the end of the data */ - data->mds_offset += resp->count; + /* Yes, so retry the read at the end of the hdr */ + hdr->mds_offset += resp->count; argp->offset += resp->count; argp->pgbase += resp->count; argp->count -= resp->count; rpc_restart_call_prepare(task); } -static void nfs_readpage_result(struct rpc_task *task, struct nfs_pgio_data *data) +static void nfs_readpage_result(struct rpc_task *task, + struct nfs_pgio_header *hdr) { - struct nfs_pgio_header *hdr = data->header; - - if (data->res.eof) { + if (hdr->res.eof) { loff_t bound; - bound = data->args.offset + data->res.count; + bound = hdr->args.offset + hdr->res.count; spin_lock(&hdr->lock); if (bound < hdr->io_start + hdr->good_bytes) { set_bit(NFS_IOHDR_EOF, &hdr->flags); @@ -254,8 +250,8 @@ static void nfs_readpage_result(struct rpc_task *task, struct nfs_pgio_data *dat hdr->good_bytes = bound - hdr->io_start; } spin_unlock(&hdr->lock); - } else if (data->res.count != data->args.count) - nfs_readpage_retry(task, data); + } else if (hdr->res.count != hdr->args.count) + nfs_readpage_retry(task, hdr); } /* @@ -404,7 +400,7 @@ out: int __init nfs_init_readpagecache(void) { nfs_rdata_cachep = kmem_cache_create("nfs_read_data", - sizeof(struct nfs_rw_header), + sizeof(struct nfs_pgio_header), 0, SLAB_HWCACHE_ALIGN, NULL); if (nfs_rdata_cachep == NULL) diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 084af1060d79..e4499d5b51e8 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -1027,8 +1027,7 @@ static bool nfs_auth_info_add(struct nfs_auth_info *auth_info, rpc_authflavor_t flavor) { unsigned int i; - unsigned int max_flavor_len = (sizeof(auth_info->flavors) / - sizeof(auth_info->flavors[0])); + unsigned int max_flavor_len = ARRAY_SIZE(auth_info->flavors); /* make sure this flavor isn't already in the list */ for (i = 0; i < auth_info->flavor_len; i++) { @@ -2180,7 +2179,7 @@ out_no_address: return -EINVAL; } -#define NFS_MOUNT_CMP_FLAGMASK ~(NFS_MOUNT_INTR \ +#define NFS_REMOUNT_CMP_FLAGMASK ~(NFS_MOUNT_INTR \ | NFS_MOUNT_SECURE \ | NFS_MOUNT_TCP \ | NFS_MOUNT_VER3 \ @@ -2188,15 +2187,16 @@ out_no_address: | NFS_MOUNT_NONLM \ | NFS_MOUNT_BROKEN_SUID \ | NFS_MOUNT_STRICTLOCK \ - | NFS_MOUNT_UNSHARED \ - | NFS_MOUNT_NORESVPORT \ | NFS_MOUNT_LEGACY_INTERFACE) +#define NFS_MOUNT_CMP_FLAGMASK (NFS_REMOUNT_CMP_FLAGMASK & \ + ~(NFS_MOUNT_UNSHARED | NFS_MOUNT_NORESVPORT)) + static int nfs_compare_remount_data(struct nfs_server *nfss, struct nfs_parsed_mount_data *data) { - if ((data->flags ^ nfss->flags) & NFS_MOUNT_CMP_FLAGMASK || + if ((data->flags ^ nfss->flags) & NFS_REMOUNT_CMP_FLAGMASK || data->rsize != nfss->rsize || data->wsize != nfss->wsize || data->version != nfss->nfs_client->rpc_ops->version || diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 5e2f10304548..175d5d073ccf 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -47,6 +47,8 @@ static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops; static const struct nfs_commit_completion_ops nfs_commit_completion_ops; static const struct nfs_rw_ops nfs_rw_write_ops; static void nfs_clear_request_commit(struct nfs_page *req); +static void nfs_init_cinfo_from_inode(struct nfs_commit_info *cinfo, + struct inode *inode); static struct kmem_cache *nfs_wdata_cachep; static mempool_t *nfs_wdata_mempool; @@ -71,18 +73,18 @@ void nfs_commit_free(struct nfs_commit_data *p) } EXPORT_SYMBOL_GPL(nfs_commit_free); -static struct nfs_rw_header *nfs_writehdr_alloc(void) +static struct nfs_pgio_header *nfs_writehdr_alloc(void) { - struct nfs_rw_header *p = mempool_alloc(nfs_wdata_mempool, GFP_NOIO); + struct nfs_pgio_header *p = mempool_alloc(nfs_wdata_mempool, GFP_NOIO); if (p) memset(p, 0, sizeof(*p)); return p; } -static void nfs_writehdr_free(struct nfs_rw_header *whdr) +static void nfs_writehdr_free(struct nfs_pgio_header *hdr) { - mempool_free(whdr, nfs_wdata_mempool); + mempool_free(hdr, nfs_wdata_mempool); } static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error) @@ -93,6 +95,38 @@ static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error) } /* + * nfs_page_search_commits_for_head_request_locked + * + * Search through commit lists on @inode for the head request for @page. + * Must be called while holding the inode (which is cinfo) lock. + * + * Returns the head request if found, or NULL if not found. + */ +static struct nfs_page * +nfs_page_search_commits_for_head_request_locked(struct nfs_inode *nfsi, + struct page *page) +{ + struct nfs_page *freq, *t; + struct nfs_commit_info cinfo; + struct inode *inode = &nfsi->vfs_inode; + + nfs_init_cinfo_from_inode(&cinfo, inode); + + /* search through pnfs commit lists */ + freq = pnfs_search_commit_reqs(inode, &cinfo, page); + if (freq) + return freq->wb_head; + + /* Linearly search the commit list for the correct request */ + list_for_each_entry_safe(freq, t, &cinfo.mds->list, wb_list) { + if (freq->wb_page == page) + return freq->wb_head; + } + + return NULL; +} + +/* * nfs_page_find_head_request_locked - find head request associated with @page * * must be called while holding the inode lock. @@ -106,21 +140,12 @@ nfs_page_find_head_request_locked(struct nfs_inode *nfsi, struct page *page) if (PagePrivate(page)) req = (struct nfs_page *)page_private(page); - else if (unlikely(PageSwapCache(page))) { - struct nfs_page *freq, *t; - - /* Linearly search the commit list for the correct req */ - list_for_each_entry_safe(freq, t, &nfsi->commit_info.list, wb_list) { - if (freq->wb_page == page) { - req = freq->wb_head; - break; - } - } - } + else if (unlikely(PageSwapCache(page))) + req = nfs_page_search_commits_for_head_request_locked(nfsi, + page); if (req) { WARN_ON_ONCE(req->wb_head != req); - kref_get(&req->wb_kref); } @@ -216,7 +241,7 @@ static bool nfs_page_group_covers_page(struct nfs_page *req) unsigned int pos = 0; unsigned int len = nfs_page_length(req->wb_page); - nfs_page_group_lock(req); + nfs_page_group_lock(req, false); do { tmp = nfs_page_group_search_locked(req->wb_head, pos); @@ -379,8 +404,6 @@ nfs_destroy_unlinked_subrequests(struct nfs_page *destroy_list, subreq->wb_head = subreq; subreq->wb_this_page = subreq; - nfs_clear_request_commit(subreq); - /* subreq is now totally disconnected from page group or any * write / commit lists. last chance to wake any waiters */ nfs_unlock_request(subreq); @@ -455,8 +478,23 @@ try_again: return NULL; } + /* holding inode lock, so always make a non-blocking call to try the + * page group lock */ + ret = nfs_page_group_lock(head, true); + if (ret < 0) { + spin_unlock(&inode->i_lock); + + if (!nonblock && ret == -EAGAIN) { + nfs_page_group_lock_wait(head); + nfs_release_request(head); + goto try_again; + } + + nfs_release_request(head); + return ERR_PTR(ret); + } + /* lock each request in the page group */ - nfs_page_group_lock(head); subreq = head; do { /* @@ -488,7 +526,7 @@ try_again: * Commit list removal accounting is done after locks are dropped */ subreq = head; do { - nfs_list_remove_request(subreq); + nfs_clear_request_commit(subreq); subreq = subreq->wb_this_page; } while (subreq != head); @@ -518,15 +556,11 @@ try_again: nfs_page_group_unlock(head); - /* drop lock to clear_request_commit the head req and clean up - * requests on destroy list */ + /* drop lock to clean uprequests on destroy list */ spin_unlock(&inode->i_lock); nfs_destroy_unlinked_subrequests(destroy_list, head); - /* clean up commit list state */ - nfs_clear_request_commit(head); - /* still holds ref on head from nfs_page_find_head_request_locked * and still has lock on head from lock loop */ return head; @@ -623,7 +657,7 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc) int err; /* Stop dirtying of new pages while we sync */ - err = wait_on_bit_lock(bitlock, NFS_INO_FLUSHING, + err = wait_on_bit_lock_action(bitlock, NFS_INO_FLUSHING, nfs_wait_bit_killable, TASK_KILLABLE); if (err) goto out_err; @@ -705,6 +739,8 @@ static void nfs_inode_remove_request(struct nfs_page *req) if (test_and_clear_bit(PG_INODE_REF, &req->wb_flags)) nfs_release_request(req); + else + WARN_ON_ONCE(1); } static void @@ -808,6 +844,7 @@ nfs_clear_page_commit(struct page *page) dec_bdi_stat(page_file_mapping(page)->backing_dev_info, BDI_RECLAIMABLE); } +/* Called holding inode (/cinfo) lock */ static void nfs_clear_request_commit(struct nfs_page *req) { @@ -817,20 +854,17 @@ nfs_clear_request_commit(struct nfs_page *req) nfs_init_cinfo_from_inode(&cinfo, inode); if (!pnfs_clear_request_commit(req, &cinfo)) { - spin_lock(cinfo.lock); nfs_request_remove_commit_list(req, &cinfo); - spin_unlock(cinfo.lock); } nfs_clear_page_commit(req->wb_page); } } -static inline -int nfs_write_need_commit(struct nfs_pgio_data *data) +int nfs_write_need_commit(struct nfs_pgio_header *hdr) { - if (data->verf.committed == NFS_DATA_SYNC) - return data->header->lseg == NULL; - return data->verf.committed != NFS_FILE_SYNC; + if (hdr->verf.committed == NFS_DATA_SYNC) + return hdr->lseg == NULL; + return hdr->verf.committed != NFS_FILE_SYNC; } #else @@ -856,8 +890,7 @@ nfs_clear_request_commit(struct nfs_page *req) { } -static inline -int nfs_write_need_commit(struct nfs_pgio_data *data) +int nfs_write_need_commit(struct nfs_pgio_header *hdr) { return 0; } @@ -883,11 +916,7 @@ static void nfs_write_completion(struct nfs_pgio_header *hdr) nfs_context_set_write_error(req->wb_context, hdr->error); goto remove_req; } - if (test_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags)) { - nfs_mark_request_dirty(req); - goto next; - } - if (test_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags)) { + if (nfs_write_need_commit(hdr)) { memcpy(&req->wb_verf, &hdr->verf.verifier, sizeof(req->wb_verf)); nfs_mark_request_commit(req, hdr->lseg, &cinfo); goto next; @@ -1038,9 +1067,9 @@ static struct nfs_page *nfs_try_to_update_request(struct inode *inode, else req->wb_bytes = rqend - req->wb_offset; out_unlock: - spin_unlock(&inode->i_lock); if (req) nfs_clear_request_commit(req); + spin_unlock(&inode->i_lock); return req; out_flushme: spin_unlock(&inode->i_lock); @@ -1241,17 +1270,18 @@ static int flush_task_priority(int how) return RPC_PRIORITY_NORMAL; } -static void nfs_initiate_write(struct nfs_pgio_data *data, struct rpc_message *msg, +static void nfs_initiate_write(struct nfs_pgio_header *hdr, + struct rpc_message *msg, struct rpc_task_setup *task_setup_data, int how) { - struct inode *inode = data->header->inode; + struct inode *inode = hdr->inode; int priority = flush_task_priority(how); task_setup_data->priority = priority; - NFS_PROTO(inode)->write_setup(data, msg); + NFS_PROTO(inode)->write_setup(hdr, msg); nfs4_state_protect_write(NFS_SERVER(inode)->nfs_client, - &task_setup_data->rpc_client, msg, data); + &task_setup_data->rpc_client, msg, hdr); } /* If a nfs_flush_* function fails, it should remove reqs from @head and @@ -1313,21 +1343,9 @@ void nfs_commit_prepare(struct rpc_task *task, void *calldata) NFS_PROTO(data->inode)->commit_rpc_prepare(task, data); } -static void nfs_writeback_release_common(struct nfs_pgio_data *data) +static void nfs_writeback_release_common(struct nfs_pgio_header *hdr) { - struct nfs_pgio_header *hdr = data->header; - int status = data->task.tk_status; - - if ((status >= 0) && nfs_write_need_commit(data)) { - spin_lock(&hdr->lock); - if (test_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags)) - ; /* Do nothing */ - else if (!test_and_set_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags)) - memcpy(&hdr->verf, &data->verf, sizeof(hdr->verf)); - else if (memcmp(&hdr->verf, &data->verf, sizeof(hdr->verf))) - set_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags); - spin_unlock(&hdr->lock); - } + /* do nothing! */ } /* @@ -1358,7 +1376,8 @@ static int nfs_should_remove_suid(const struct inode *inode) /* * This function is called when the WRITE call is complete. */ -static int nfs_writeback_done(struct rpc_task *task, struct nfs_pgio_data *data, +static int nfs_writeback_done(struct rpc_task *task, + struct nfs_pgio_header *hdr, struct inode *inode) { int status; @@ -1370,13 +1389,14 @@ static int nfs_writeback_done(struct rpc_task *task, struct nfs_pgio_data *data, * another writer had changed the file, but some applications * depend on tighter cache coherency when writing. */ - status = NFS_PROTO(inode)->write_done(task, data); + status = NFS_PROTO(inode)->write_done(task, hdr); if (status != 0) return status; - nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, data->res.count); + nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, hdr->res.count); #if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4) - if (data->res.verf->committed < data->args.stable && task->tk_status >= 0) { + if (hdr->res.verf->committed < hdr->args.stable && + task->tk_status >= 0) { /* We tried a write call, but the server did not * commit data to stable storage even though we * requested it. @@ -1392,7 +1412,7 @@ static int nfs_writeback_done(struct rpc_task *task, struct nfs_pgio_data *data, dprintk("NFS: faulty NFS server %s:" " (committed = %d) != (stable = %d)\n", NFS_SERVER(inode)->nfs_client->cl_hostname, - data->res.verf->committed, data->args.stable); + hdr->res.verf->committed, hdr->args.stable); complain = jiffies + 300 * HZ; } } @@ -1407,16 +1427,17 @@ static int nfs_writeback_done(struct rpc_task *task, struct nfs_pgio_data *data, /* * This function is called when the WRITE call is complete. */ -static void nfs_writeback_result(struct rpc_task *task, struct nfs_pgio_data *data) +static void nfs_writeback_result(struct rpc_task *task, + struct nfs_pgio_header *hdr) { - struct nfs_pgio_args *argp = &data->args; - struct nfs_pgio_res *resp = &data->res; + struct nfs_pgio_args *argp = &hdr->args; + struct nfs_pgio_res *resp = &hdr->res; if (resp->count < argp->count) { static unsigned long complain; /* This a short write! */ - nfs_inc_stats(data->header->inode, NFSIOS_SHORTWRITE); + nfs_inc_stats(hdr->inode, NFSIOS_SHORTWRITE); /* Has the server at least made some progress? */ if (resp->count == 0) { @@ -1426,14 +1447,14 @@ static void nfs_writeback_result(struct rpc_task *task, struct nfs_pgio_data *da argp->count); complain = jiffies + 300 * HZ; } - nfs_set_pgio_error(data->header, -EIO, argp->offset); + nfs_set_pgio_error(hdr, -EIO, argp->offset); task->tk_status = -EIO; return; } /* Was this an NFSv2 write or an NFSv3 stable write? */ if (resp->verf->committed != NFS_UNSTABLE) { /* Resend from where the server left off */ - data->mds_offset += resp->count; + hdr->mds_offset += resp->count; argp->offset += resp->count; argp->pgbase += resp->count; argp->count -= resp->count; @@ -1703,7 +1724,7 @@ int nfs_commit_inode(struct inode *inode, int how) return error; if (!may_wait) goto out_mark_dirty; - error = wait_on_bit(&NFS_I(inode)->flags, + error = wait_on_bit_action(&NFS_I(inode)->flags, NFS_INO_COMMIT, nfs_wait_bit_killable, TASK_KILLABLE); @@ -1884,7 +1905,7 @@ int nfs_migrate_page(struct address_space *mapping, struct page *newpage, int __init nfs_init_writepagecache(void) { nfs_wdata_cachep = kmem_cache_create("nfs_write_data", - sizeof(struct nfs_rw_header), + sizeof(struct nfs_pgio_header), 0, SLAB_HWCACHE_ALIGN, NULL); if (nfs_wdata_cachep == NULL) diff --git a/fs/nfs_common/nfsacl.c b/fs/nfs_common/nfsacl.c index ed628f71274c..538f142935ea 100644 --- a/fs/nfs_common/nfsacl.c +++ b/fs/nfs_common/nfsacl.c @@ -30,9 +30,6 @@ MODULE_LICENSE("GPL"); -EXPORT_SYMBOL_GPL(nfsacl_encode); -EXPORT_SYMBOL_GPL(nfsacl_decode); - struct nfsacl_encode_desc { struct xdr_array2_desc desc; unsigned int count; @@ -136,6 +133,7 @@ int nfsacl_encode(struct xdr_buf *buf, unsigned int base, struct inode *inode, nfsacl_desc.desc.array_len; return err; } +EXPORT_SYMBOL_GPL(nfsacl_encode); struct nfsacl_decode_desc { struct xdr_array2_desc desc; @@ -295,3 +293,4 @@ int nfsacl_decode(struct xdr_buf *buf, unsigned int base, unsigned int *aclcnt, return 8 + nfsacl_desc.desc.elem_size * nfsacl_desc.desc.array_len; } +EXPORT_SYMBOL_GPL(nfsacl_decode); diff --git a/fs/nfsd/acl.h b/fs/nfsd/acl.h index a986ceb6fd0d..4cd7c69a6cb9 100644 --- a/fs/nfsd/acl.h +++ b/fs/nfsd/acl.h @@ -47,7 +47,7 @@ struct svc_rqst; #define NFS4_ACL_MAX ((PAGE_SIZE - sizeof(struct nfs4_acl)) \ / sizeof(struct nfs4_ace)) -struct nfs4_acl *nfs4_acl_new(int); +int nfs4_acl_bytes(int entries); int nfs4_acl_get_whotype(char *, u32); __be32 nfs4_acl_write_who(struct xdr_stream *xdr, int who); diff --git a/fs/nfsd/auth.c b/fs/nfsd/auth.c index 72f44823adbb..9d46a0bdd9f9 100644 --- a/fs/nfsd/auth.c +++ b/fs/nfsd/auth.c @@ -28,7 +28,7 @@ int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp) validate_process_creds(); /* discard any old override before preparing the new set */ - revert_creds(get_cred(current->real_cred)); + revert_creds(get_cred(current_real_cred())); new = prepare_creds(); if (!new) return -ENOMEM; diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index 13b85f94d9e2..72ffd7cce3c3 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -698,8 +698,8 @@ static void svc_export_init(struct cache_head *cnew, struct cache_head *citem) kref_get(&item->ex_client->ref); new->ex_client = item->ex_client; - new->ex_path.dentry = dget(item->ex_path.dentry); - new->ex_path.mnt = mntget(item->ex_path.mnt); + new->ex_path = item->ex_path; + path_get(&item->ex_path); new->ex_fslocs.locations = NULL; new->ex_fslocs.locations_count = 0; new->ex_fslocs.migrated = 0; @@ -1253,7 +1253,7 @@ static int e_show(struct seq_file *m, void *p) return 0; } - cache_get(&exp->h); + exp_get(exp); if (cache_check(cd, &exp->h, NULL)) return 0; exp_put(exp); diff --git a/fs/nfsd/export.h b/fs/nfsd/export.h index cfeea85c5bed..04dc8c167b0c 100644 --- a/fs/nfsd/export.h +++ b/fs/nfsd/export.h @@ -101,9 +101,10 @@ static inline void exp_put(struct svc_export *exp) cache_put(&exp->h, exp->cd); } -static inline void exp_get(struct svc_export *exp) +static inline struct svc_export *exp_get(struct svc_export *exp) { cache_get(&exp->h); + return exp; } struct svc_export * rqst_exp_find(struct svc_rqst *, int, u32 *); diff --git a/fs/nfsd/fault_inject.c b/fs/nfsd/fault_inject.c index 2ed05c3cd43d..c16bf5af6831 100644 --- a/fs/nfsd/fault_inject.c +++ b/fs/nfsd/fault_inject.c @@ -17,81 +17,13 @@ struct nfsd_fault_inject_op { char *file; - u64 (*forget)(struct nfs4_client *, u64); - u64 (*print)(struct nfs4_client *, u64); + u64 (*get)(void); + u64 (*set_val)(u64); + u64 (*set_clnt)(struct sockaddr_storage *, size_t); }; -static struct nfsd_fault_inject_op inject_ops[] = { - { - .file = "forget_clients", - .forget = nfsd_forget_client, - .print = nfsd_print_client, - }, - { - .file = "forget_locks", - .forget = nfsd_forget_client_locks, - .print = nfsd_print_client_locks, - }, - { - .file = "forget_openowners", - .forget = nfsd_forget_client_openowners, - .print = nfsd_print_client_openowners, - }, - { - .file = "forget_delegations", - .forget = nfsd_forget_client_delegations, - .print = nfsd_print_client_delegations, - }, - { - .file = "recall_delegations", - .forget = nfsd_recall_client_delegations, - .print = nfsd_print_client_delegations, - }, -}; - -static long int NUM_INJECT_OPS = sizeof(inject_ops) / sizeof(struct nfsd_fault_inject_op); static struct dentry *debug_dir; -static void nfsd_inject_set(struct nfsd_fault_inject_op *op, u64 val) -{ - u64 count = 0; - - if (val == 0) - printk(KERN_INFO "NFSD Fault Injection: %s (all)", op->file); - else - printk(KERN_INFO "NFSD Fault Injection: %s (n = %llu)", op->file, val); - - nfs4_lock_state(); - count = nfsd_for_n_state(val, op->forget); - nfs4_unlock_state(); - printk(KERN_INFO "NFSD: %s: found %llu", op->file, count); -} - -static void nfsd_inject_set_client(struct nfsd_fault_inject_op *op, - struct sockaddr_storage *addr, - size_t addr_size) -{ - char buf[INET6_ADDRSTRLEN]; - struct nfs4_client *clp; - u64 count; - - nfs4_lock_state(); - clp = nfsd_find_client(addr, addr_size); - if (clp) { - count = op->forget(clp, 0); - rpc_ntop((struct sockaddr *)&clp->cl_addr, buf, sizeof(buf)); - printk(KERN_INFO "NFSD [%s]: Client %s had %llu state object(s)\n", op->file, buf, count); - } - nfs4_unlock_state(); -} - -static void nfsd_inject_get(struct nfsd_fault_inject_op *op, u64 *val) -{ - nfs4_lock_state(); - *val = nfsd_for_n_state(0, op->print); - nfs4_unlock_state(); -} - static ssize_t fault_inject_read(struct file *file, char __user *buf, size_t len, loff_t *ppos) { @@ -99,9 +31,10 @@ static ssize_t fault_inject_read(struct file *file, char __user *buf, char read_buf[25]; size_t size; loff_t pos = *ppos; + struct nfsd_fault_inject_op *op = file_inode(file)->i_private; if (!pos) - nfsd_inject_get(file_inode(file)->i_private, &val); + val = op->get(); size = scnprintf(read_buf, sizeof(read_buf), "%llu\n", val); return simple_read_from_buffer(buf, len, ppos, read_buf, size); @@ -114,18 +47,36 @@ static ssize_t fault_inject_write(struct file *file, const char __user *buf, size_t size = min(sizeof(write_buf) - 1, len); struct net *net = current->nsproxy->net_ns; struct sockaddr_storage sa; + struct nfsd_fault_inject_op *op = file_inode(file)->i_private; u64 val; + char *nl; if (copy_from_user(write_buf, buf, size)) return -EFAULT; write_buf[size] = '\0'; + /* Deal with any embedded newlines in the string */ + nl = strchr(write_buf, '\n'); + if (nl) { + size = nl - write_buf; + *nl = '\0'; + } + size = rpc_pton(net, write_buf, size, (struct sockaddr *)&sa, sizeof(sa)); - if (size > 0) - nfsd_inject_set_client(file_inode(file)->i_private, &sa, size); - else { + if (size > 0) { + val = op->set_clnt(&sa, size); + if (val) + pr_info("NFSD [%s]: Client %s had %llu state object(s)\n", + op->file, write_buf, val); + } else { val = simple_strtoll(write_buf, NULL, 0); - nfsd_inject_set(file_inode(file)->i_private, val); + if (val == 0) + pr_info("NFSD Fault Injection: %s (all)", op->file); + else + pr_info("NFSD Fault Injection: %s (n = %llu)", + op->file, val); + val = op->set_val(val); + pr_info("NFSD: %s: found %llu", op->file, val); } return len; /* on success, claim we got the whole input */ } @@ -141,6 +92,41 @@ void nfsd_fault_inject_cleanup(void) debugfs_remove_recursive(debug_dir); } +static struct nfsd_fault_inject_op inject_ops[] = { + { + .file = "forget_clients", + .get = nfsd_inject_print_clients, + .set_val = nfsd_inject_forget_clients, + .set_clnt = nfsd_inject_forget_client, + }, + { + .file = "forget_locks", + .get = nfsd_inject_print_locks, + .set_val = nfsd_inject_forget_locks, + .set_clnt = nfsd_inject_forget_client_locks, + }, + { + .file = "forget_openowners", + .get = nfsd_inject_print_openowners, + .set_val = nfsd_inject_forget_openowners, + .set_clnt = nfsd_inject_forget_client_openowners, + }, + { + .file = "forget_delegations", + .get = nfsd_inject_print_delegations, + .set_val = nfsd_inject_forget_delegations, + .set_clnt = nfsd_inject_forget_client_delegations, + }, + { + .file = "recall_delegations", + .get = nfsd_inject_print_delegations, + .set_val = nfsd_inject_recall_delegations, + .set_clnt = nfsd_inject_recall_client_delegations, + }, +}; + +#define NUM_INJECT_OPS (sizeof(inject_ops)/sizeof(struct nfsd_fault_inject_op)) + int nfsd_fault_inject_init(void) { unsigned int i; diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h index d32b3aa6600d..ea6749a32760 100644 --- a/fs/nfsd/netns.h +++ b/fs/nfsd/netns.h @@ -29,14 +29,19 @@ #define CLIENT_HASH_SIZE (1 << CLIENT_HASH_BITS) #define CLIENT_HASH_MASK (CLIENT_HASH_SIZE - 1) -#define LOCKOWNER_INO_HASH_BITS 8 -#define LOCKOWNER_INO_HASH_SIZE (1 << LOCKOWNER_INO_HASH_BITS) - #define SESSION_HASH_SIZE 512 struct cld_net; struct nfsd4_client_tracking_ops; +/* + * Represents a nfsd "container". With respect to nfsv4 state tracking, the + * fields of interest are the *_id_hashtbls and the *_name_tree. These track + * the nfs4_client objects by either short or long form clientid. + * + * Each nfsd_net runs a nfs4_laundromat workqueue job when necessary to clean + * up expired clients and delegations within the container. + */ struct nfsd_net { struct cld_net *cld_net; @@ -66,8 +71,6 @@ struct nfsd_net { struct rb_root conf_name_tree; struct list_head *unconf_id_hashtbl; struct rb_root unconf_name_tree; - struct list_head *ownerstr_hashtbl; - struct list_head *lockowner_ino_hashtbl; struct list_head *sessionid_hashtbl; /* * client_lru holds client queue ordered by nfs4_client.cl_time @@ -97,10 +100,16 @@ struct nfsd_net { bool nfsd_net_up; bool lockd_up; + /* Time of server startup */ + struct timeval nfssvc_boot; + /* - * Time of server startup + * Max number of connections this nfsd container will allow. Defaults + * to '0' which is means that it bases this on the number of threads. */ - struct timeval nfssvc_boot; + unsigned int max_connections; + + u32 clientid_counter; struct svc_serv *nfsd_serv; }; diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c index 12b023a7ab7d..ac54ea60b3f6 100644 --- a/fs/nfsd/nfs2acl.c +++ b/fs/nfsd/nfs2acl.c @@ -54,14 +54,14 @@ static __be32 nfsacld_proc_getacl(struct svc_rqst * rqstp, if (resp->mask & (NFS_ACL|NFS_ACLCNT)) { acl = get_acl(inode, ACL_TYPE_ACCESS); - if (IS_ERR(acl)) { - nfserr = nfserrno(PTR_ERR(acl)); - goto fail; - } if (acl == NULL) { /* Solaris returns the inode's minimum ACL. */ acl = posix_acl_from_mode(inode->i_mode, GFP_KERNEL); } + if (IS_ERR(acl)) { + nfserr = nfserrno(PTR_ERR(acl)); + goto fail; + } resp->acl_access = acl; } if (resp->mask & (NFS_DFACL|NFS_DFACLCNT)) { diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c index 2a514e21dc74..34cbbab6abd7 100644 --- a/fs/nfsd/nfs3acl.c +++ b/fs/nfsd/nfs3acl.c @@ -47,14 +47,14 @@ static __be32 nfsd3_proc_getacl(struct svc_rqst * rqstp, if (resp->mask & (NFS_ACL|NFS_ACLCNT)) { acl = get_acl(inode, ACL_TYPE_ACCESS); - if (IS_ERR(acl)) { - nfserr = nfserrno(PTR_ERR(acl)); - goto fail; - } if (acl == NULL) { /* Solaris returns the inode's minimum ACL. */ acl = posix_acl_from_mode(inode->i_mode, GFP_KERNEL); } + if (IS_ERR(acl)) { + nfserr = nfserrno(PTR_ERR(acl)); + goto fail; + } resp->acl_access = acl; } if (resp->mask & (NFS_DFACL|NFS_DFACLCNT)) { diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c index 401289913130..fa2525b2e9d7 100644 --- a/fs/nfsd/nfs3proc.c +++ b/fs/nfsd/nfs3proc.c @@ -157,11 +157,7 @@ nfsd3_proc_read(struct svc_rqst *rqstp, struct nfsd3_readargs *argp, * 1 (status) + 22 (post_op_attr) + 1 (count) + 1 (eof) * + 1 (xdr opaque byte count) = 26 */ - - resp->count = argp->count; - if (max_blocksize < resp->count) - resp->count = max_blocksize; - + resp->count = min(argp->count, max_blocksize); svc_reserve_auth(rqstp, ((1 + NFS3_POST_OP_ATTR_WORDS + 3)<<2) + resp->count +4); fh_copy(&resp->fh, &argp->fh); @@ -286,8 +282,7 @@ nfsd3_proc_symlink(struct svc_rqst *rqstp, struct nfsd3_symlinkargs *argp, fh_copy(&resp->dirfh, &argp->ffh); fh_init(&resp->fh, NFS3_FHSIZE); nfserr = nfsd_symlink(rqstp, &resp->dirfh, argp->fname, argp->flen, - argp->tname, argp->tlen, - &resp->fh, &argp->attrs); + argp->tname, &resp->fh); RETURN_STATUS(nfserr); } diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index e6c01e80325e..39c5eb3ad33a 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c @@ -120,10 +120,7 @@ decode_sattr3(__be32 *p, struct iattr *iap) iap->ia_valid |= ATTR_SIZE; p = xdr_decode_hyper(p, &newsize); - if (newsize <= NFS_OFFSET_MAX) - iap->ia_size = newsize; - else - iap->ia_size = NFS_OFFSET_MAX; + iap->ia_size = min_t(u64, newsize, NFS_OFFSET_MAX); } if ((tmp = ntohl(*p++)) == 1) { /* set to server time */ iap->ia_valid |= ATTR_ATIME; @@ -338,10 +335,8 @@ nfs3svc_decode_readargs(struct svc_rqst *rqstp, __be32 *p, return 0; p = xdr_decode_hyper(p, &args->offset); - len = args->count = ntohl(*p++); - - if (len > max_blocksize) - len = max_blocksize; + args->count = ntohl(*p++); + len = min(args->count, max_blocksize); /* set up the kvec */ v=0; @@ -349,7 +344,7 @@ nfs3svc_decode_readargs(struct svc_rqst *rqstp, __be32 *p, struct page *p = *(rqstp->rq_next_page++); rqstp->rq_vec[v].iov_base = page_address(p); - rqstp->rq_vec[v].iov_len = len < PAGE_SIZE? len : PAGE_SIZE; + rqstp->rq_vec[v].iov_len = min_t(unsigned int, len, PAGE_SIZE); len -= rqstp->rq_vec[v].iov_len; v++; } @@ -484,9 +479,7 @@ nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p, } /* now copy next page if there is one */ if (len && !avail && rqstp->rq_arg.page_len) { - avail = rqstp->rq_arg.page_len; - if (avail > PAGE_SIZE) - avail = PAGE_SIZE; + avail = min_t(unsigned int, rqstp->rq_arg.page_len, PAGE_SIZE); old = page_address(rqstp->rq_arg.pages[0]); } while (len && avail && *old) { @@ -571,10 +564,7 @@ nfs3svc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p, args->verf = p; p += 2; args->dircount = ~0; args->count = ntohl(*p++); - - if (args->count > PAGE_SIZE) - args->count = PAGE_SIZE; - + args->count = min_t(u32, args->count, PAGE_SIZE); args->buffer = page_address(*(rqstp->rq_next_page++)); return xdr_argsize_check(rqstp, p); @@ -595,10 +585,7 @@ nfs3svc_decode_readdirplusargs(struct svc_rqst *rqstp, __be32 *p, args->dircount = ntohl(*p++); args->count = ntohl(*p++); - len = (args->count > max_blocksize) ? max_blocksize : - args->count; - args->count = len; - + len = args->count = min(args->count, max_blocksize); while (len > 0) { struct page *p = *(rqstp->rq_next_page++); if (!args->buffer) @@ -913,8 +900,7 @@ encode_entry(struct readdir_cd *ccd, const char *name, int namlen, */ /* truncate filename if too long */ - if (namlen > NFS3_MAXNAMLEN) - namlen = NFS3_MAXNAMLEN; + namlen = min(namlen, NFS3_MAXNAMLEN); slen = XDR_QUADLEN(namlen); elen = slen + NFS3_ENTRY_BAGGAGE diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c index d714156a19fd..59fd76651781 100644 --- a/fs/nfsd/nfs4acl.c +++ b/fs/nfsd/nfs4acl.c @@ -146,35 +146,43 @@ nfsd4_get_nfs4_acl(struct svc_rqst *rqstp, struct dentry *dentry, int size = 0; pacl = get_acl(inode, ACL_TYPE_ACCESS); - if (!pacl) { + if (!pacl) pacl = posix_acl_from_mode(inode->i_mode, GFP_KERNEL); - if (IS_ERR(pacl)) - return PTR_ERR(pacl); - } + + if (IS_ERR(pacl)) + return PTR_ERR(pacl); + /* allocate for worst case: one (deny, allow) pair each: */ size += 2 * pacl->a_count; if (S_ISDIR(inode->i_mode)) { flags = NFS4_ACL_DIR; dpacl = get_acl(inode, ACL_TYPE_DEFAULT); + if (IS_ERR(dpacl)) { + error = PTR_ERR(dpacl); + goto rel_pacl; + } + if (dpacl) size += 2 * dpacl->a_count; } - *acl = nfs4_acl_new(size); + *acl = kmalloc(nfs4_acl_bytes(size), GFP_KERNEL); if (*acl == NULL) { error = -ENOMEM; goto out; } + (*acl)->naces = 0; _posix_to_nfsv4_one(pacl, *acl, flags & ~NFS4_ACL_TYPE_DEFAULT); if (dpacl) _posix_to_nfsv4_one(dpacl, *acl, flags | NFS4_ACL_TYPE_DEFAULT); - out: - posix_acl_release(pacl); +out: posix_acl_release(dpacl); +rel_pacl: + posix_acl_release(pacl); return error; } @@ -872,16 +880,13 @@ ace2type(struct nfs4_ace *ace) return -1; } -struct nfs4_acl * -nfs4_acl_new(int n) +/* + * return the size of the struct nfs4_acl required to represent an acl + * with @entries entries. + */ +int nfs4_acl_bytes(int entries) { - struct nfs4_acl *acl; - - acl = kmalloc(sizeof(*acl) + n*sizeof(struct nfs4_ace), GFP_KERNEL); - if (acl == NULL) - return NULL; - acl->naces = 0; - return acl; + return sizeof(struct nfs4_acl) + entries * sizeof(struct nfs4_ace); } static struct { @@ -935,5 +940,5 @@ __be32 nfs4_acl_write_who(struct xdr_stream *xdr, int who) return 0; } WARN_ON_ONCE(1); - return -1; + return nfserr_serverfault; } diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 2c73cae9899d..e0be57b0f79b 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -337,7 +337,7 @@ static void encode_cb_recall4args(struct xdr_stream *xdr, p = xdr_reserve_space(xdr, 4); *p++ = xdr_zero; /* truncate */ - encode_nfs_fh4(xdr, &dp->dl_fh); + encode_nfs_fh4(xdr, &dp->dl_stid.sc_file->fi_fhandle); hdr->nops++; } @@ -678,7 +678,7 @@ static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *c (clp->cl_cred.cr_flavor >= RPC_AUTH_GSS_KRB5)) return -EINVAL; args.client_name = clp->cl_cred.cr_principal; - args.prognumber = conn->cb_prog, + args.prognumber = conn->cb_prog; args.protocol = XPRT_TRANSPORT_TCP; args.authflavor = clp->cl_cred.cr_flavor; clp->cl_cb_ident = conn->cb_ident; @@ -689,7 +689,8 @@ static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *c clp->cl_cb_session = ses; args.bc_xprt = conn->cb_xprt; args.prognumber = clp->cl_cb_session->se_cb_prog; - args.protocol = XPRT_TRANSPORT_BC_TCP; + args.protocol = conn->cb_xprt->xpt_class->xcl_ident | + XPRT_TRANSPORT_BC; args.authflavor = ses->se_cb_sec.flavor; } /* Create RPC client */ @@ -904,7 +905,7 @@ static void nfsd4_cb_recall_release(void *calldata) spin_lock(&clp->cl_lock); list_del(&cb->cb_per_client); spin_unlock(&clp->cl_lock); - nfs4_put_delegation(dp); + nfs4_put_stid(&dp->dl_stid); } } @@ -933,7 +934,7 @@ void nfsd4_shutdown_callback(struct nfs4_client *clp) set_bit(NFSD4_CLIENT_CB_KILL, &clp->cl_flags); /* * Note this won't actually result in a null callback; - * instead, nfsd4_do_callback_rpc() will detect the killed + * instead, nfsd4_run_cb_null() will detect the killed * client, destroy the rpc client, and stop: */ do_probe_callback(clp); @@ -1011,9 +1012,9 @@ static void nfsd4_process_cb_update(struct nfsd4_callback *cb) run_nfsd4_cb(cb); } -static void nfsd4_do_callback_rpc(struct work_struct *w) +static void +nfsd4_run_callback_rpc(struct nfsd4_callback *cb) { - struct nfsd4_callback *cb = container_of(w, struct nfsd4_callback, cb_work); struct nfs4_client *clp = cb->cb_clp; struct rpc_clnt *clnt; @@ -1031,9 +1032,22 @@ static void nfsd4_do_callback_rpc(struct work_struct *w) cb->cb_ops, cb); } -void nfsd4_init_callback(struct nfsd4_callback *cb) +void +nfsd4_run_cb_null(struct work_struct *w) { - INIT_WORK(&cb->cb_work, nfsd4_do_callback_rpc); + struct nfsd4_callback *cb = container_of(w, struct nfsd4_callback, + cb_work); + nfsd4_run_callback_rpc(cb); +} + +void +nfsd4_run_cb_recall(struct work_struct *w) +{ + struct nfsd4_callback *cb = container_of(w, struct nfsd4_callback, + cb_work); + + nfsd4_prepare_cb_recall(cb->cb_op); + nfsd4_run_callback_rpc(cb); } void nfsd4_cb_recall(struct nfs4_delegation *dp) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 8f029db5d271..5e0dc528a0e8 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -177,7 +177,7 @@ fh_dup2(struct svc_fh *dst, struct svc_fh *src) fh_put(dst); dget(src->fh_dentry); if (src->fh_export) - cache_get(&src->fh_export->h); + exp_get(src->fh_export); *dst = *src; } @@ -385,8 +385,6 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (nfsd4_has_session(cstate)) copy_clientid(&open->op_clientid, cstate->session); - nfs4_lock_state(); - /* check seqid for replay. set nfs4_owner */ resp = rqstp->rq_resp; status = nfsd4_process_open1(&resp->cstate, open, nn); @@ -431,8 +429,7 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, break; case NFS4_OPEN_CLAIM_PREVIOUS: status = nfs4_check_open_reclaim(&open->op_clientid, - cstate->minorversion, - nn); + cstate, nn); if (status) goto out; open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED; @@ -461,19 +458,17 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, * set, (2) sets open->op_stateid, (3) sets open->op_delegation. */ status = nfsd4_process_open2(rqstp, resfh, open); - WARN_ON(status && open->op_created); + WARN(status && open->op_created, + "nfsd4_process_open2 failed to open newly-created file! status=%u\n", + be32_to_cpu(status)); out: if (resfh && resfh != &cstate->current_fh) { fh_dup2(&cstate->current_fh, resfh); fh_put(resfh); kfree(resfh); } - nfsd4_cleanup_open_state(open, status); - if (open->op_openowner && !nfsd4_has_session(cstate)) - cstate->replay_owner = &open->op_openowner->oo_owner; + nfsd4_cleanup_open_state(cstate, open, status); nfsd4_bump_seqid(cstate, status); - if (!cstate->replay_owner) - nfs4_unlock_state(); return status; } @@ -581,8 +576,12 @@ static void gen_boot_verifier(nfs4_verifier *verifier, struct net *net) __be32 verf[2]; struct nfsd_net *nn = net_generic(net, nfsd_net_id); - verf[0] = (__be32)nn->nfssvc_boot.tv_sec; - verf[1] = (__be32)nn->nfssvc_boot.tv_usec; + /* + * This is opaque to client, so no need to byte-swap. Use + * __force to keep sparse happy + */ + verf[0] = (__force __be32)nn->nfssvc_boot.tv_sec; + verf[1] = (__force __be32)nn->nfssvc_boot.tv_usec; memcpy(verifier->data, verf, sizeof(verifier->data)); } @@ -619,8 +618,7 @@ nfsd4_create(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, case NF4LNK: status = nfsd_symlink(rqstp, &cstate->current_fh, create->cr_name, create->cr_namelen, - create->cr_linkname, create->cr_linklen, - &resfh, &create->cr_iattr); + create->cr_data, &resfh); break; case NF4BLK: @@ -909,8 +907,8 @@ nfsd4_secinfo_no_name(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstat default: return nfserr_inval; } - exp_get(cstate->current_fh.fh_export); - sin->sin_exp = cstate->current_fh.fh_export; + + sin->sin_exp = exp_get(cstate->current_fh.fh_export); fh_put(&cstate->current_fh); return nfs_ok; } @@ -1289,7 +1287,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp, * Don't use the deferral mechanism for NFSv4; compounds make it * too hard to avoid non-idempotency problems. */ - rqstp->rq_usedeferral = 0; + rqstp->rq_usedeferral = false; /* * According to RFC3010, this takes precedence over all other errors. @@ -1391,10 +1389,7 @@ encode_op: args->ops, args->opcnt, resp->opcnt, op->opnum, be32_to_cpu(status)); - if (cstate->replay_owner) { - nfs4_unlock_state(); - cstate->replay_owner = NULL; - } + nfsd4_cstate_clear_replay(cstate); /* XXX Ugh, we need to get rid of this kind of special case: */ if (op->opnum == OP_READ && op->u.read.rd_filp) fput(op->u.read.rd_filp); @@ -1408,7 +1403,7 @@ encode_op: BUG_ON(cstate->replay_owner); out: /* Reset deferral mechanism for RPC deferrals */ - rqstp->rq_usedeferral = 1; + rqstp->rq_usedeferral = true; dprintk("nfsv4 compound returned %d\n", ntohl(status)); return status; } @@ -1520,21 +1515,17 @@ static inline u32 nfsd4_read_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) u32 maxcount = 0, rlen = 0; maxcount = svc_max_payload(rqstp); - rlen = op->u.read.rd_length; - - if (rlen > maxcount) - rlen = maxcount; + rlen = min(op->u.read.rd_length, maxcount); return (op_encode_hdr_size + 2 + XDR_QUADLEN(rlen)) * sizeof(__be32); } static inline u32 nfsd4_readdir_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) { - u32 maxcount = svc_max_payload(rqstp); - u32 rlen = op->u.readdir.rd_maxcount; + u32 maxcount = 0, rlen = 0; - if (rlen > maxcount) - rlen = maxcount; + maxcount = svc_max_payload(rqstp); + rlen = min(op->u.readdir.rd_maxcount, maxcount); return (op_encode_hdr_size + op_encode_verifier_maxsz + XDR_QUADLEN(rlen)) * sizeof(__be32); diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 2204e1fe5725..2e80a59e7e91 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -70,13 +70,11 @@ static u64 current_sessionid = 1; #define CURRENT_STATEID(stateid) (!memcmp((stateid), ¤tstateid, sizeof(stateid_t))) /* forward declarations */ -static int check_for_locks(struct nfs4_file *filp, struct nfs4_lockowner *lowner); +static bool check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner); +static void nfs4_free_ol_stateid(struct nfs4_stid *stid); /* Locking: */ -/* Currently used for almost all code touching nfsv4 state: */ -static DEFINE_MUTEX(client_mutex); - /* * Currently used for the del_recall_lru and file hash table. In an * effort to decrease the scope of the client_mutex, this spinlock may @@ -84,18 +82,18 @@ static DEFINE_MUTEX(client_mutex); */ static DEFINE_SPINLOCK(state_lock); +/* + * A waitqueue for all in-progress 4.0 CLOSE operations that are waiting for + * the refcount on the open stateid to drop. + */ +static DECLARE_WAIT_QUEUE_HEAD(close_wq); + static struct kmem_cache *openowner_slab; static struct kmem_cache *lockowner_slab; static struct kmem_cache *file_slab; static struct kmem_cache *stateid_slab; static struct kmem_cache *deleg_slab; -void -nfs4_lock_state(void) -{ - mutex_lock(&client_mutex); -} - static void free_session(struct nfsd4_session *); static bool is_session_dead(struct nfsd4_session *ses) @@ -103,12 +101,6 @@ static bool is_session_dead(struct nfsd4_session *ses) return ses->se_flags & NFS4_SESSION_DEAD; } -void nfsd4_put_session(struct nfsd4_session *ses) -{ - if (atomic_dec_and_test(&ses->se_ref) && is_session_dead(ses)) - free_session(ses); -} - static __be32 mark_session_dead_locked(struct nfsd4_session *ses, int ref_held_by_me) { if (atomic_read(&ses->se_ref) > ref_held_by_me) @@ -117,46 +109,17 @@ static __be32 mark_session_dead_locked(struct nfsd4_session *ses, int ref_held_b return nfs_ok; } -static __be32 nfsd4_get_session_locked(struct nfsd4_session *ses) -{ - if (is_session_dead(ses)) - return nfserr_badsession; - atomic_inc(&ses->se_ref); - return nfs_ok; -} - -void -nfs4_unlock_state(void) -{ - mutex_unlock(&client_mutex); -} - static bool is_client_expired(struct nfs4_client *clp) { return clp->cl_time == 0; } -static __be32 mark_client_expired_locked(struct nfs4_client *clp) -{ - if (atomic_read(&clp->cl_refcount)) - return nfserr_jukebox; - clp->cl_time = 0; - return nfs_ok; -} - -static __be32 mark_client_expired(struct nfs4_client *clp) +static __be32 get_client_locked(struct nfs4_client *clp) { struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); - __be32 ret; - spin_lock(&nn->client_lock); - ret = mark_client_expired_locked(clp); - spin_unlock(&nn->client_lock); - return ret; -} + lockdep_assert_held(&nn->client_lock); -static __be32 get_client_locked(struct nfs4_client *clp) -{ if (is_client_expired(clp)) return nfserr_expired; atomic_inc(&clp->cl_refcount); @@ -197,13 +160,17 @@ renew_client(struct nfs4_client *clp) static void put_client_renew_locked(struct nfs4_client *clp) { + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); + + lockdep_assert_held(&nn->client_lock); + if (!atomic_dec_and_test(&clp->cl_refcount)) return; if (!is_client_expired(clp)) renew_client_locked(clp); } -void put_client_renew(struct nfs4_client *clp) +static void put_client_renew(struct nfs4_client *clp) { struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); @@ -214,6 +181,79 @@ void put_client_renew(struct nfs4_client *clp) spin_unlock(&nn->client_lock); } +static __be32 nfsd4_get_session_locked(struct nfsd4_session *ses) +{ + __be32 status; + + if (is_session_dead(ses)) + return nfserr_badsession; + status = get_client_locked(ses->se_client); + if (status) + return status; + atomic_inc(&ses->se_ref); + return nfs_ok; +} + +static void nfsd4_put_session_locked(struct nfsd4_session *ses) +{ + struct nfs4_client *clp = ses->se_client; + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); + + lockdep_assert_held(&nn->client_lock); + + if (atomic_dec_and_test(&ses->se_ref) && is_session_dead(ses)) + free_session(ses); + put_client_renew_locked(clp); +} + +static void nfsd4_put_session(struct nfsd4_session *ses) +{ + struct nfs4_client *clp = ses->se_client; + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); + + spin_lock(&nn->client_lock); + nfsd4_put_session_locked(ses); + spin_unlock(&nn->client_lock); +} + +static int +same_owner_str(struct nfs4_stateowner *sop, struct xdr_netobj *owner) +{ + return (sop->so_owner.len == owner->len) && + 0 == memcmp(sop->so_owner.data, owner->data, owner->len); +} + +static struct nfs4_openowner * +find_openstateowner_str_locked(unsigned int hashval, struct nfsd4_open *open, + struct nfs4_client *clp) +{ + struct nfs4_stateowner *so; + + lockdep_assert_held(&clp->cl_lock); + + list_for_each_entry(so, &clp->cl_ownerstr_hashtbl[hashval], + so_strhash) { + if (!so->so_is_open_owner) + continue; + if (same_owner_str(so, &open->op_owner)) { + atomic_inc(&so->so_count); + return openowner(so); + } + } + return NULL; +} + +static struct nfs4_openowner * +find_openstateowner_str(unsigned int hashval, struct nfsd4_open *open, + struct nfs4_client *clp) +{ + struct nfs4_openowner *oo; + + spin_lock(&clp->cl_lock); + oo = find_openstateowner_str_locked(hashval, open, clp); + spin_unlock(&clp->cl_lock); + return oo; +} static inline u32 opaque_hashval(const void *ptr, int nbytes) @@ -236,10 +276,11 @@ static void nfsd4_free_file(struct nfs4_file *f) static inline void put_nfs4_file(struct nfs4_file *fi) { + might_lock(&state_lock); + if (atomic_dec_and_lock(&fi->fi_ref, &state_lock)) { hlist_del(&fi->fi_hash); spin_unlock(&state_lock); - iput(fi->fi_inode); nfsd4_free_file(fi); } } @@ -250,7 +291,80 @@ get_nfs4_file(struct nfs4_file *fi) atomic_inc(&fi->fi_ref); } -static int num_delegations; +static struct file * +__nfs4_get_fd(struct nfs4_file *f, int oflag) +{ + if (f->fi_fds[oflag]) + return get_file(f->fi_fds[oflag]); + return NULL; +} + +static struct file * +find_writeable_file_locked(struct nfs4_file *f) +{ + struct file *ret; + + lockdep_assert_held(&f->fi_lock); + + ret = __nfs4_get_fd(f, O_WRONLY); + if (!ret) + ret = __nfs4_get_fd(f, O_RDWR); + return ret; +} + +static struct file * +find_writeable_file(struct nfs4_file *f) +{ + struct file *ret; + + spin_lock(&f->fi_lock); + ret = find_writeable_file_locked(f); + spin_unlock(&f->fi_lock); + + return ret; +} + +static struct file *find_readable_file_locked(struct nfs4_file *f) +{ + struct file *ret; + + lockdep_assert_held(&f->fi_lock); + + ret = __nfs4_get_fd(f, O_RDONLY); + if (!ret) + ret = __nfs4_get_fd(f, O_RDWR); + return ret; +} + +static struct file * +find_readable_file(struct nfs4_file *f) +{ + struct file *ret; + + spin_lock(&f->fi_lock); + ret = find_readable_file_locked(f); + spin_unlock(&f->fi_lock); + + return ret; +} + +static struct file * +find_any_file(struct nfs4_file *f) +{ + struct file *ret; + + spin_lock(&f->fi_lock); + ret = __nfs4_get_fd(f, O_RDWR); + if (!ret) { + ret = __nfs4_get_fd(f, O_WRONLY); + if (!ret) + ret = __nfs4_get_fd(f, O_RDONLY); + } + spin_unlock(&f->fi_lock); + return ret; +} + +static atomic_long_t num_delegations; unsigned long max_delegations; /* @@ -262,12 +376,11 @@ unsigned long max_delegations; #define OWNER_HASH_SIZE (1 << OWNER_HASH_BITS) #define OWNER_HASH_MASK (OWNER_HASH_SIZE - 1) -static unsigned int ownerstr_hashval(u32 clientid, struct xdr_netobj *ownername) +static unsigned int ownerstr_hashval(struct xdr_netobj *ownername) { unsigned int ret; ret = opaque_hashval(ownername->data, ownername->len); - ret += clientid; return ret & OWNER_HASH_MASK; } @@ -275,75 +388,124 @@ static unsigned int ownerstr_hashval(u32 clientid, struct xdr_netobj *ownername) #define FILE_HASH_BITS 8 #define FILE_HASH_SIZE (1 << FILE_HASH_BITS) -static unsigned int file_hashval(struct inode *ino) +static unsigned int nfsd_fh_hashval(struct knfsd_fh *fh) +{ + return jhash2(fh->fh_base.fh_pad, XDR_QUADLEN(fh->fh_size), 0); +} + +static unsigned int file_hashval(struct knfsd_fh *fh) +{ + return nfsd_fh_hashval(fh) & (FILE_HASH_SIZE - 1); +} + +static bool nfsd_fh_match(struct knfsd_fh *fh1, struct knfsd_fh *fh2) { - /* XXX: why are we hashing on inode pointer, anyway? */ - return hash_ptr(ino, FILE_HASH_BITS); + return fh1->fh_size == fh2->fh_size && + !memcmp(fh1->fh_base.fh_pad, + fh2->fh_base.fh_pad, + fh1->fh_size); } static struct hlist_head file_hashtbl[FILE_HASH_SIZE]; -static void __nfs4_file_get_access(struct nfs4_file *fp, int oflag) +static void +__nfs4_file_get_access(struct nfs4_file *fp, u32 access) { - WARN_ON_ONCE(!(fp->fi_fds[oflag] || fp->fi_fds[O_RDWR])); - atomic_inc(&fp->fi_access[oflag]); + lockdep_assert_held(&fp->fi_lock); + + if (access & NFS4_SHARE_ACCESS_WRITE) + atomic_inc(&fp->fi_access[O_WRONLY]); + if (access & NFS4_SHARE_ACCESS_READ) + atomic_inc(&fp->fi_access[O_RDONLY]); } -static void nfs4_file_get_access(struct nfs4_file *fp, int oflag) +static __be32 +nfs4_file_get_access(struct nfs4_file *fp, u32 access) { - if (oflag == O_RDWR) { - __nfs4_file_get_access(fp, O_RDONLY); - __nfs4_file_get_access(fp, O_WRONLY); - } else - __nfs4_file_get_access(fp, oflag); + lockdep_assert_held(&fp->fi_lock); + + /* Does this access mode make sense? */ + if (access & ~NFS4_SHARE_ACCESS_BOTH) + return nfserr_inval; + + /* Does it conflict with a deny mode already set? */ + if ((access & fp->fi_share_deny) != 0) + return nfserr_share_denied; + + __nfs4_file_get_access(fp, access); + return nfs_ok; } -static void nfs4_file_put_fd(struct nfs4_file *fp, int oflag) +static __be32 nfs4_file_check_deny(struct nfs4_file *fp, u32 deny) { - if (fp->fi_fds[oflag]) { - fput(fp->fi_fds[oflag]); - fp->fi_fds[oflag] = NULL; + /* Common case is that there is no deny mode. */ + if (deny) { + /* Does this deny mode make sense? */ + if (deny & ~NFS4_SHARE_DENY_BOTH) + return nfserr_inval; + + if ((deny & NFS4_SHARE_DENY_READ) && + atomic_read(&fp->fi_access[O_RDONLY])) + return nfserr_share_denied; + + if ((deny & NFS4_SHARE_DENY_WRITE) && + atomic_read(&fp->fi_access[O_WRONLY])) + return nfserr_share_denied; } + return nfs_ok; } static void __nfs4_file_put_access(struct nfs4_file *fp, int oflag) { - if (atomic_dec_and_test(&fp->fi_access[oflag])) { - nfs4_file_put_fd(fp, oflag); + might_lock(&fp->fi_lock); + + if (atomic_dec_and_lock(&fp->fi_access[oflag], &fp->fi_lock)) { + struct file *f1 = NULL; + struct file *f2 = NULL; + + swap(f1, fp->fi_fds[oflag]); if (atomic_read(&fp->fi_access[1 - oflag]) == 0) - nfs4_file_put_fd(fp, O_RDWR); + swap(f2, fp->fi_fds[O_RDWR]); + spin_unlock(&fp->fi_lock); + if (f1) + fput(f1); + if (f2) + fput(f2); } } -static void nfs4_file_put_access(struct nfs4_file *fp, int oflag) +static void nfs4_file_put_access(struct nfs4_file *fp, u32 access) { - if (oflag == O_RDWR) { - __nfs4_file_put_access(fp, O_RDONLY); + WARN_ON_ONCE(access & ~NFS4_SHARE_ACCESS_BOTH); + + if (access & NFS4_SHARE_ACCESS_WRITE) __nfs4_file_put_access(fp, O_WRONLY); - } else - __nfs4_file_put_access(fp, oflag); + if (access & NFS4_SHARE_ACCESS_READ) + __nfs4_file_put_access(fp, O_RDONLY); } -static struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct -kmem_cache *slab) +static struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, + struct kmem_cache *slab) { - struct idr *stateids = &cl->cl_stateids; struct nfs4_stid *stid; int new_id; - stid = kmem_cache_alloc(slab, GFP_KERNEL); + stid = kmem_cache_zalloc(slab, GFP_KERNEL); if (!stid) return NULL; - new_id = idr_alloc_cyclic(stateids, stid, 0, 0, GFP_KERNEL); + idr_preload(GFP_KERNEL); + spin_lock(&cl->cl_lock); + new_id = idr_alloc_cyclic(&cl->cl_stateids, stid, 0, 0, GFP_NOWAIT); + spin_unlock(&cl->cl_lock); + idr_preload_end(); if (new_id < 0) goto out_free; stid->sc_client = cl; - stid->sc_type = 0; stid->sc_stateid.si_opaque.so_id = new_id; stid->sc_stateid.si_opaque.so_clid = cl->cl_clientid; /* Will be incremented before return to client: */ - stid->sc_stateid.si_generation = 0; + atomic_set(&stid->sc_count, 1); /* * It shouldn't be a problem to reuse an opaque stateid value. @@ -360,9 +522,24 @@ out_free: return NULL; } -static struct nfs4_ol_stateid * nfs4_alloc_stateid(struct nfs4_client *clp) +static struct nfs4_ol_stateid * nfs4_alloc_open_stateid(struct nfs4_client *clp) { - return openlockstateid(nfs4_alloc_stid(clp, stateid_slab)); + struct nfs4_stid *stid; + struct nfs4_ol_stateid *stp; + + stid = nfs4_alloc_stid(clp, stateid_slab); + if (!stid) + return NULL; + + stp = openlockstateid(stid); + stp->st_stid.sc_free = nfs4_free_ol_stateid; + return stp; +} + +static void nfs4_free_deleg(struct nfs4_stid *stid) +{ + kmem_cache_free(deleg_slab, stid); + atomic_long_dec(&num_delegations); } /* @@ -379,10 +556,11 @@ static struct nfs4_ol_stateid * nfs4_alloc_stateid(struct nfs4_client *clp) * Each filter is 256 bits. We hash the filehandle to 32bit and use the * low 3 bytes as hash-table indices. * - * 'state_lock', which is always held when block_delegations() is called, + * 'blocked_delegations_lock', which is always taken in block_delegations(), * is used to manage concurrent access. Testing does not need the lock * except when swapping the two filters. */ +static DEFINE_SPINLOCK(blocked_delegations_lock); static struct bloom_pair { int entries, old_entries; time_t swap_time; @@ -398,7 +576,7 @@ static int delegation_blocked(struct knfsd_fh *fh) if (bd->entries == 0) return 0; if (seconds_since_boot() - bd->swap_time > 30) { - spin_lock(&state_lock); + spin_lock(&blocked_delegations_lock); if (seconds_since_boot() - bd->swap_time > 30) { bd->entries -= bd->old_entries; bd->old_entries = bd->entries; @@ -407,7 +585,7 @@ static int delegation_blocked(struct knfsd_fh *fh) bd->new = 1-bd->new; bd->swap_time = seconds_since_boot(); } - spin_unlock(&state_lock); + spin_unlock(&blocked_delegations_lock); } hash = arch_fast_hash(&fh->fh_base, fh->fh_size, 0); if (test_bit(hash&255, bd->set[0]) && @@ -430,69 +608,73 @@ static void block_delegations(struct knfsd_fh *fh) hash = arch_fast_hash(&fh->fh_base, fh->fh_size, 0); + spin_lock(&blocked_delegations_lock); __set_bit(hash&255, bd->set[bd->new]); __set_bit((hash>>8)&255, bd->set[bd->new]); __set_bit((hash>>16)&255, bd->set[bd->new]); if (bd->entries == 0) bd->swap_time = seconds_since_boot(); bd->entries += 1; + spin_unlock(&blocked_delegations_lock); } static struct nfs4_delegation * -alloc_init_deleg(struct nfs4_client *clp, struct nfs4_ol_stateid *stp, struct svc_fh *current_fh) +alloc_init_deleg(struct nfs4_client *clp, struct svc_fh *current_fh) { struct nfs4_delegation *dp; + long n; dprintk("NFSD alloc_init_deleg\n"); - if (num_delegations > max_delegations) - return NULL; + n = atomic_long_inc_return(&num_delegations); + if (n < 0 || n > max_delegations) + goto out_dec; if (delegation_blocked(¤t_fh->fh_handle)) - return NULL; + goto out_dec; dp = delegstateid(nfs4_alloc_stid(clp, deleg_slab)); if (dp == NULL) - return dp; + goto out_dec; + + dp->dl_stid.sc_free = nfs4_free_deleg; /* * delegation seqid's are never incremented. The 4.1 special * meaning of seqid 0 isn't meaningful, really, but let's avoid * 0 anyway just for consistency and use 1: */ dp->dl_stid.sc_stateid.si_generation = 1; - num_delegations++; INIT_LIST_HEAD(&dp->dl_perfile); INIT_LIST_HEAD(&dp->dl_perclnt); INIT_LIST_HEAD(&dp->dl_recall_lru); - dp->dl_file = NULL; dp->dl_type = NFS4_OPEN_DELEGATE_READ; - fh_copy_shallow(&dp->dl_fh, ¤t_fh->fh_handle); - dp->dl_time = 0; - atomic_set(&dp->dl_count, 1); - nfsd4_init_callback(&dp->dl_recall); + INIT_WORK(&dp->dl_recall.cb_work, nfsd4_run_cb_recall); return dp; +out_dec: + atomic_long_dec(&num_delegations); + return NULL; } -static void remove_stid(struct nfs4_stid *s) +void +nfs4_put_stid(struct nfs4_stid *s) { - struct idr *stateids = &s->sc_client->cl_stateids; + struct nfs4_file *fp = s->sc_file; + struct nfs4_client *clp = s->sc_client; - idr_remove(stateids, s->sc_stateid.si_opaque.so_id); -} + might_lock(&clp->cl_lock); -static void nfs4_free_stid(struct kmem_cache *slab, struct nfs4_stid *s) -{ - kmem_cache_free(slab, s); -} - -void -nfs4_put_delegation(struct nfs4_delegation *dp) -{ - if (atomic_dec_and_test(&dp->dl_count)) { - nfs4_free_stid(deleg_slab, &dp->dl_stid); - num_delegations--; + if (!atomic_dec_and_lock(&s->sc_count, &clp->cl_lock)) { + wake_up_all(&close_wq); + return; } + idr_remove(&clp->cl_stateids, s->sc_stateid.si_opaque.so_id); + spin_unlock(&clp->cl_lock); + s->sc_free(s); + if (fp) + put_nfs4_file(fp); } static void nfs4_put_deleg_lease(struct nfs4_file *fp) { + lockdep_assert_held(&state_lock); + if (!fp->fi_lease) return; if (atomic_dec_and_test(&fp->fi_delegees)) { @@ -512,54 +694,54 @@ static void hash_delegation_locked(struct nfs4_delegation *dp, struct nfs4_file *fp) { lockdep_assert_held(&state_lock); + lockdep_assert_held(&fp->fi_lock); + atomic_inc(&dp->dl_stid.sc_count); dp->dl_stid.sc_type = NFS4_DELEG_STID; list_add(&dp->dl_perfile, &fp->fi_delegations); list_add(&dp->dl_perclnt, &dp->dl_stid.sc_client->cl_delegations); } -/* Called under the state lock. */ static void -unhash_delegation(struct nfs4_delegation *dp) +unhash_delegation_locked(struct nfs4_delegation *dp) { - spin_lock(&state_lock); - list_del_init(&dp->dl_perclnt); - list_del_init(&dp->dl_perfile); - list_del_init(&dp->dl_recall_lru); - spin_unlock(&state_lock); - if (dp->dl_file) { - nfs4_put_deleg_lease(dp->dl_file); - put_nfs4_file(dp->dl_file); - dp->dl_file = NULL; - } -} - + struct nfs4_file *fp = dp->dl_stid.sc_file; + lockdep_assert_held(&state_lock); -static void destroy_revoked_delegation(struct nfs4_delegation *dp) -{ + dp->dl_stid.sc_type = NFS4_CLOSED_DELEG_STID; + /* Ensure that deleg break won't try to requeue it */ + ++dp->dl_time; + spin_lock(&fp->fi_lock); + list_del_init(&dp->dl_perclnt); list_del_init(&dp->dl_recall_lru); - remove_stid(&dp->dl_stid); - nfs4_put_delegation(dp); + list_del_init(&dp->dl_perfile); + spin_unlock(&fp->fi_lock); + if (fp) + nfs4_put_deleg_lease(fp); } static void destroy_delegation(struct nfs4_delegation *dp) { - unhash_delegation(dp); - remove_stid(&dp->dl_stid); - nfs4_put_delegation(dp); + spin_lock(&state_lock); + unhash_delegation_locked(dp); + spin_unlock(&state_lock); + nfs4_put_stid(&dp->dl_stid); } static void revoke_delegation(struct nfs4_delegation *dp) { struct nfs4_client *clp = dp->dl_stid.sc_client; + WARN_ON(!list_empty(&dp->dl_recall_lru)); + if (clp->cl_minorversion == 0) - destroy_delegation(dp); + nfs4_put_stid(&dp->dl_stid); else { - unhash_delegation(dp); dp->dl_stid.sc_type = NFS4_REVOKED_DELEG_STID; + spin_lock(&clp->cl_lock); list_add(&dp->dl_recall_lru, &clp->cl_revoked); + spin_unlock(&clp->cl_lock); } } @@ -607,57 +789,62 @@ bmap_to_share_mode(unsigned long bmap) { return access; } -static bool -test_share(struct nfs4_ol_stateid *stp, struct nfsd4_open *open) { - unsigned int access, deny; - - access = bmap_to_share_mode(stp->st_access_bmap); - deny = bmap_to_share_mode(stp->st_deny_bmap); - if ((access & open->op_share_deny) || (deny & open->op_share_access)) - return false; - return true; -} - /* set share access for a given stateid */ static inline void set_access(u32 access, struct nfs4_ol_stateid *stp) { - __set_bit(access, &stp->st_access_bmap); + unsigned char mask = 1 << access; + + WARN_ON_ONCE(access > NFS4_SHARE_ACCESS_BOTH); + stp->st_access_bmap |= mask; } /* clear share access for a given stateid */ static inline void clear_access(u32 access, struct nfs4_ol_stateid *stp) { - __clear_bit(access, &stp->st_access_bmap); + unsigned char mask = 1 << access; + + WARN_ON_ONCE(access > NFS4_SHARE_ACCESS_BOTH); + stp->st_access_bmap &= ~mask; } /* test whether a given stateid has access */ static inline bool test_access(u32 access, struct nfs4_ol_stateid *stp) { - return test_bit(access, &stp->st_access_bmap); + unsigned char mask = 1 << access; + + return (bool)(stp->st_access_bmap & mask); } /* set share deny for a given stateid */ static inline void -set_deny(u32 access, struct nfs4_ol_stateid *stp) +set_deny(u32 deny, struct nfs4_ol_stateid *stp) { - __set_bit(access, &stp->st_deny_bmap); + unsigned char mask = 1 << deny; + + WARN_ON_ONCE(deny > NFS4_SHARE_DENY_BOTH); + stp->st_deny_bmap |= mask; } /* clear share deny for a given stateid */ static inline void -clear_deny(u32 access, struct nfs4_ol_stateid *stp) +clear_deny(u32 deny, struct nfs4_ol_stateid *stp) { - __clear_bit(access, &stp->st_deny_bmap); + unsigned char mask = 1 << deny; + + WARN_ON_ONCE(deny > NFS4_SHARE_DENY_BOTH); + stp->st_deny_bmap &= ~mask; } /* test whether a given stateid is denying specific access */ static inline bool -test_deny(u32 access, struct nfs4_ol_stateid *stp) +test_deny(u32 deny, struct nfs4_ol_stateid *stp) { - return test_bit(access, &stp->st_deny_bmap); + unsigned char mask = 1 << deny; + + return (bool)(stp->st_deny_bmap & mask); } static int nfs4_access_to_omode(u32 access) @@ -674,138 +861,283 @@ static int nfs4_access_to_omode(u32 access) return O_RDONLY; } +/* + * A stateid that had a deny mode associated with it is being released + * or downgraded. Recalculate the deny mode on the file. + */ +static void +recalculate_deny_mode(struct nfs4_file *fp) +{ + struct nfs4_ol_stateid *stp; + + spin_lock(&fp->fi_lock); + fp->fi_share_deny = 0; + list_for_each_entry(stp, &fp->fi_stateids, st_perfile) + fp->fi_share_deny |= bmap_to_share_mode(stp->st_deny_bmap); + spin_unlock(&fp->fi_lock); +} + +static void +reset_union_bmap_deny(u32 deny, struct nfs4_ol_stateid *stp) +{ + int i; + bool change = false; + + for (i = 1; i < 4; i++) { + if ((i & deny) != i) { + change = true; + clear_deny(i, stp); + } + } + + /* Recalculate per-file deny mode if there was a change */ + if (change) + recalculate_deny_mode(stp->st_stid.sc_file); +} + /* release all access and file references for a given stateid */ static void release_all_access(struct nfs4_ol_stateid *stp) { int i; + struct nfs4_file *fp = stp->st_stid.sc_file; + + if (fp && stp->st_deny_bmap != 0) + recalculate_deny_mode(fp); for (i = 1; i < 4; i++) { if (test_access(i, stp)) - nfs4_file_put_access(stp->st_file, - nfs4_access_to_omode(i)); + nfs4_file_put_access(stp->st_stid.sc_file, i); clear_access(i, stp); } } -static void unhash_generic_stateid(struct nfs4_ol_stateid *stp) +static void nfs4_put_stateowner(struct nfs4_stateowner *sop) { + struct nfs4_client *clp = sop->so_client; + + might_lock(&clp->cl_lock); + + if (!atomic_dec_and_lock(&sop->so_count, &clp->cl_lock)) + return; + sop->so_ops->so_unhash(sop); + spin_unlock(&clp->cl_lock); + kfree(sop->so_owner.data); + sop->so_ops->so_free(sop); +} + +static void unhash_ol_stateid(struct nfs4_ol_stateid *stp) +{ + struct nfs4_file *fp = stp->st_stid.sc_file; + + lockdep_assert_held(&stp->st_stateowner->so_client->cl_lock); + + spin_lock(&fp->fi_lock); list_del(&stp->st_perfile); + spin_unlock(&fp->fi_lock); list_del(&stp->st_perstateowner); } -static void close_generic_stateid(struct nfs4_ol_stateid *stp) +static void nfs4_free_ol_stateid(struct nfs4_stid *stid) { + struct nfs4_ol_stateid *stp = openlockstateid(stid); + release_all_access(stp); - put_nfs4_file(stp->st_file); - stp->st_file = NULL; + if (stp->st_stateowner) + nfs4_put_stateowner(stp->st_stateowner); + kmem_cache_free(stateid_slab, stid); } -static void free_generic_stateid(struct nfs4_ol_stateid *stp) +static void nfs4_free_lock_stateid(struct nfs4_stid *stid) { - remove_stid(&stp->st_stid); - nfs4_free_stid(stateid_slab, &stp->st_stid); + struct nfs4_ol_stateid *stp = openlockstateid(stid); + struct nfs4_lockowner *lo = lockowner(stp->st_stateowner); + struct file *file; + + file = find_any_file(stp->st_stid.sc_file); + if (file) + filp_close(file, (fl_owner_t)lo); + nfs4_free_ol_stateid(stid); } -static void release_lock_stateid(struct nfs4_ol_stateid *stp) +/* + * Put the persistent reference to an already unhashed generic stateid, while + * holding the cl_lock. If it's the last reference, then put it onto the + * reaplist for later destruction. + */ +static void put_ol_stateid_locked(struct nfs4_ol_stateid *stp, + struct list_head *reaplist) { - struct file *file; + struct nfs4_stid *s = &stp->st_stid; + struct nfs4_client *clp = s->sc_client; + + lockdep_assert_held(&clp->cl_lock); - unhash_generic_stateid(stp); + WARN_ON_ONCE(!list_empty(&stp->st_locks)); + + if (!atomic_dec_and_test(&s->sc_count)) { + wake_up_all(&close_wq); + return; + } + + idr_remove(&clp->cl_stateids, s->sc_stateid.si_opaque.so_id); + list_add(&stp->st_locks, reaplist); +} + +static void unhash_lock_stateid(struct nfs4_ol_stateid *stp) +{ + struct nfs4_openowner *oo = openowner(stp->st_openstp->st_stateowner); + + lockdep_assert_held(&oo->oo_owner.so_client->cl_lock); + + list_del_init(&stp->st_locks); + unhash_ol_stateid(stp); unhash_stid(&stp->st_stid); - file = find_any_file(stp->st_file); - if (file) - locks_remove_posix(file, (fl_owner_t)lockowner(stp->st_stateowner)); - close_generic_stateid(stp); - free_generic_stateid(stp); } -static void unhash_lockowner(struct nfs4_lockowner *lo) +static void release_lock_stateid(struct nfs4_ol_stateid *stp) { - struct nfs4_ol_stateid *stp; + struct nfs4_openowner *oo = openowner(stp->st_openstp->st_stateowner); - list_del(&lo->lo_owner.so_strhash); - list_del(&lo->lo_perstateid); - list_del(&lo->lo_owner_ino_hash); - while (!list_empty(&lo->lo_owner.so_stateids)) { - stp = list_first_entry(&lo->lo_owner.so_stateids, - struct nfs4_ol_stateid, st_perstateowner); - release_lock_stateid(stp); - } + spin_lock(&oo->oo_owner.so_client->cl_lock); + unhash_lock_stateid(stp); + spin_unlock(&oo->oo_owner.so_client->cl_lock); + nfs4_put_stid(&stp->st_stid); } -static void nfs4_free_lockowner(struct nfs4_lockowner *lo) +static void unhash_lockowner_locked(struct nfs4_lockowner *lo) { - kfree(lo->lo_owner.so_owner.data); - kmem_cache_free(lockowner_slab, lo); + struct nfs4_client *clp = lo->lo_owner.so_client; + + lockdep_assert_held(&clp->cl_lock); + + list_del_init(&lo->lo_owner.so_strhash); +} + +/* + * Free a list of generic stateids that were collected earlier after being + * fully unhashed. + */ +static void +free_ol_stateid_reaplist(struct list_head *reaplist) +{ + struct nfs4_ol_stateid *stp; + struct nfs4_file *fp; + + might_sleep(); + + while (!list_empty(reaplist)) { + stp = list_first_entry(reaplist, struct nfs4_ol_stateid, + st_locks); + list_del(&stp->st_locks); + fp = stp->st_stid.sc_file; + stp->st_stid.sc_free(&stp->st_stid); + if (fp) + put_nfs4_file(fp); + } } static void release_lockowner(struct nfs4_lockowner *lo) { - unhash_lockowner(lo); - nfs4_free_lockowner(lo); + struct nfs4_client *clp = lo->lo_owner.so_client; + struct nfs4_ol_stateid *stp; + struct list_head reaplist; + + INIT_LIST_HEAD(&reaplist); + + spin_lock(&clp->cl_lock); + unhash_lockowner_locked(lo); + while (!list_empty(&lo->lo_owner.so_stateids)) { + stp = list_first_entry(&lo->lo_owner.so_stateids, + struct nfs4_ol_stateid, st_perstateowner); + unhash_lock_stateid(stp); + put_ol_stateid_locked(stp, &reaplist); + } + spin_unlock(&clp->cl_lock); + free_ol_stateid_reaplist(&reaplist); + nfs4_put_stateowner(&lo->lo_owner); } -static void -release_stateid_lockowners(struct nfs4_ol_stateid *open_stp) +static void release_open_stateid_locks(struct nfs4_ol_stateid *open_stp, + struct list_head *reaplist) { - struct nfs4_lockowner *lo; + struct nfs4_ol_stateid *stp; - while (!list_empty(&open_stp->st_lockowners)) { - lo = list_entry(open_stp->st_lockowners.next, - struct nfs4_lockowner, lo_perstateid); - release_lockowner(lo); + while (!list_empty(&open_stp->st_locks)) { + stp = list_entry(open_stp->st_locks.next, + struct nfs4_ol_stateid, st_locks); + unhash_lock_stateid(stp); + put_ol_stateid_locked(stp, reaplist); } } -static void unhash_open_stateid(struct nfs4_ol_stateid *stp) +static void unhash_open_stateid(struct nfs4_ol_stateid *stp, + struct list_head *reaplist) { - unhash_generic_stateid(stp); - release_stateid_lockowners(stp); - close_generic_stateid(stp); + lockdep_assert_held(&stp->st_stid.sc_client->cl_lock); + + unhash_ol_stateid(stp); + release_open_stateid_locks(stp, reaplist); } static void release_open_stateid(struct nfs4_ol_stateid *stp) { - unhash_open_stateid(stp); - free_generic_stateid(stp); + LIST_HEAD(reaplist); + + spin_lock(&stp->st_stid.sc_client->cl_lock); + unhash_open_stateid(stp, &reaplist); + put_ol_stateid_locked(stp, &reaplist); + spin_unlock(&stp->st_stid.sc_client->cl_lock); + free_ol_stateid_reaplist(&reaplist); } -static void unhash_openowner(struct nfs4_openowner *oo) +static void unhash_openowner_locked(struct nfs4_openowner *oo) { - struct nfs4_ol_stateid *stp; + struct nfs4_client *clp = oo->oo_owner.so_client; - list_del(&oo->oo_owner.so_strhash); - list_del(&oo->oo_perclient); - while (!list_empty(&oo->oo_owner.so_stateids)) { - stp = list_first_entry(&oo->oo_owner.so_stateids, - struct nfs4_ol_stateid, st_perstateowner); - release_open_stateid(stp); - } + lockdep_assert_held(&clp->cl_lock); + + list_del_init(&oo->oo_owner.so_strhash); + list_del_init(&oo->oo_perclient); } static void release_last_closed_stateid(struct nfs4_openowner *oo) { - struct nfs4_ol_stateid *s = oo->oo_last_closed_stid; + struct nfsd_net *nn = net_generic(oo->oo_owner.so_client->net, + nfsd_net_id); + struct nfs4_ol_stateid *s; + spin_lock(&nn->client_lock); + s = oo->oo_last_closed_stid; if (s) { - free_generic_stateid(s); + list_del_init(&oo->oo_close_lru); oo->oo_last_closed_stid = NULL; } -} - -static void nfs4_free_openowner(struct nfs4_openowner *oo) -{ - kfree(oo->oo_owner.so_owner.data); - kmem_cache_free(openowner_slab, oo); + spin_unlock(&nn->client_lock); + if (s) + nfs4_put_stid(&s->st_stid); } static void release_openowner(struct nfs4_openowner *oo) { - unhash_openowner(oo); - list_del(&oo->oo_close_lru); + struct nfs4_ol_stateid *stp; + struct nfs4_client *clp = oo->oo_owner.so_client; + struct list_head reaplist; + + INIT_LIST_HEAD(&reaplist); + + spin_lock(&clp->cl_lock); + unhash_openowner_locked(oo); + while (!list_empty(&oo->oo_owner.so_stateids)) { + stp = list_first_entry(&oo->oo_owner.so_stateids, + struct nfs4_ol_stateid, st_perstateowner); + unhash_open_stateid(stp, &reaplist); + put_ol_stateid_locked(stp, &reaplist); + } + spin_unlock(&clp->cl_lock); + free_ol_stateid_reaplist(&reaplist); release_last_closed_stateid(oo); - nfs4_free_openowner(oo); + nfs4_put_stateowner(&oo->oo_owner); } static inline int @@ -842,7 +1174,7 @@ void nfsd4_bump_seqid(struct nfsd4_compound_state *cstate, __be32 nfserr) return; if (!seqid_mutating_err(ntohl(nfserr))) { - cstate->replay_owner = NULL; + nfsd4_cstate_clear_replay(cstate); return; } if (!so) @@ -1030,10 +1362,8 @@ static void nfsd4_init_conn(struct svc_rqst *rqstp, struct nfsd4_conn *conn, str if (ret) /* oops; xprt is already down: */ nfsd4_conn_lost(&conn->cn_xpt_user); - if (conn->cn_flags & NFS4_CDFC4_BACK) { - /* callback channel may be back up */ - nfsd4_probe_callback(ses->se_client); - } + /* We may have gained or lost a callback channel: */ + nfsd4_probe_callback_sync(ses->se_client); } static struct nfsd4_conn *alloc_conn_from_crses(struct svc_rqst *rqstp, struct nfsd4_create_session *cses) @@ -1073,9 +1403,6 @@ static void __free_session(struct nfsd4_session *ses) static void free_session(struct nfsd4_session *ses) { - struct nfsd_net *nn = net_generic(ses->se_client->net, nfsd_net_id); - - lockdep_assert_held(&nn->client_lock); nfsd4_del_conns(ses); nfsd4_put_drc_mem(&ses->se_fchannel); __free_session(ses); @@ -1097,12 +1424,10 @@ static void init_session(struct svc_rqst *rqstp, struct nfsd4_session *new, stru new->se_cb_sec = cses->cb_sec; atomic_set(&new->se_ref, 0); idx = hash_sessionid(&new->se_sessionid); - spin_lock(&nn->client_lock); list_add(&new->se_hash, &nn->sessionid_hashtbl[idx]); spin_lock(&clp->cl_lock); list_add(&new->se_perclnt, &clp->cl_sessions); spin_unlock(&clp->cl_lock); - spin_unlock(&nn->client_lock); if (cses->flags & SESSION4_BACK_CHAN) { struct sockaddr *sa = svc_addr(rqstp); @@ -1120,12 +1445,14 @@ static void init_session(struct svc_rqst *rqstp, struct nfsd4_session *new, stru /* caller must hold client_lock */ static struct nfsd4_session * -find_in_sessionid_hashtbl(struct nfs4_sessionid *sessionid, struct net *net) +__find_in_sessionid_hashtbl(struct nfs4_sessionid *sessionid, struct net *net) { struct nfsd4_session *elem; int idx; struct nfsd_net *nn = net_generic(net, nfsd_net_id); + lockdep_assert_held(&nn->client_lock); + dump_sessionid(__func__, sessionid); idx = hash_sessionid(sessionid); /* Search in the appropriate list */ @@ -1140,10 +1467,33 @@ find_in_sessionid_hashtbl(struct nfs4_sessionid *sessionid, struct net *net) return NULL; } +static struct nfsd4_session * +find_in_sessionid_hashtbl(struct nfs4_sessionid *sessionid, struct net *net, + __be32 *ret) +{ + struct nfsd4_session *session; + __be32 status = nfserr_badsession; + + session = __find_in_sessionid_hashtbl(sessionid, net); + if (!session) + goto out; + status = nfsd4_get_session_locked(session); + if (status) + session = NULL; +out: + *ret = status; + return session; +} + /* caller must hold client_lock */ static void unhash_session(struct nfsd4_session *ses) { + struct nfs4_client *clp = ses->se_client; + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); + + lockdep_assert_held(&nn->client_lock); + list_del(&ses->se_hash); spin_lock(&ses->se_client->cl_lock); list_del(&ses->se_perclnt); @@ -1169,15 +1519,20 @@ STALE_CLIENTID(clientid_t *clid, struct nfsd_net *nn) static struct nfs4_client *alloc_client(struct xdr_netobj name) { struct nfs4_client *clp; + int i; clp = kzalloc(sizeof(struct nfs4_client), GFP_KERNEL); if (clp == NULL) return NULL; clp->cl_name.data = kmemdup(name.data, name.len, GFP_KERNEL); - if (clp->cl_name.data == NULL) { - kfree(clp); - return NULL; - } + if (clp->cl_name.data == NULL) + goto err_no_name; + clp->cl_ownerstr_hashtbl = kmalloc(sizeof(struct list_head) * + OWNER_HASH_SIZE, GFP_KERNEL); + if (!clp->cl_ownerstr_hashtbl) + goto err_no_hashtbl; + for (i = 0; i < OWNER_HASH_SIZE; i++) + INIT_LIST_HEAD(&clp->cl_ownerstr_hashtbl[i]); clp->cl_name.len = name.len; INIT_LIST_HEAD(&clp->cl_sessions); idr_init(&clp->cl_stateids); @@ -1192,14 +1547,16 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name) spin_lock_init(&clp->cl_lock); rpc_init_wait_queue(&clp->cl_cb_waitq, "Backchannel slot table"); return clp; +err_no_hashtbl: + kfree(clp->cl_name.data); +err_no_name: + kfree(clp); + return NULL; } static void free_client(struct nfs4_client *clp) { - struct nfsd_net __maybe_unused *nn = net_generic(clp->net, nfsd_net_id); - - lockdep_assert_held(&nn->client_lock); while (!list_empty(&clp->cl_sessions)) { struct nfsd4_session *ses; ses = list_entry(clp->cl_sessions.next, struct nfsd4_session, @@ -1210,18 +1567,32 @@ free_client(struct nfs4_client *clp) } rpc_destroy_wait_queue(&clp->cl_cb_waitq); free_svc_cred(&clp->cl_cred); + kfree(clp->cl_ownerstr_hashtbl); kfree(clp->cl_name.data); idr_destroy(&clp->cl_stateids); kfree(clp); } /* must be called under the client_lock */ -static inline void +static void unhash_client_locked(struct nfs4_client *clp) { + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); struct nfsd4_session *ses; - list_del(&clp->cl_lru); + lockdep_assert_held(&nn->client_lock); + + /* Mark the client as expired! */ + clp->cl_time = 0; + /* Make it invisible */ + if (!list_empty(&clp->cl_idhash)) { + list_del_init(&clp->cl_idhash); + if (test_bit(NFSD4_CLIENT_CONFIRMED, &clp->cl_flags)) + rb_erase(&clp->cl_namenode, &nn->conf_name_tree); + else + rb_erase(&clp->cl_namenode, &nn->unconf_name_tree); + } + list_del_init(&clp->cl_lru); spin_lock(&clp->cl_lock); list_for_each_entry(ses, &clp->cl_sessions, se_perclnt) list_del_init(&ses->se_hash); @@ -1229,53 +1600,71 @@ unhash_client_locked(struct nfs4_client *clp) } static void -destroy_client(struct nfs4_client *clp) +unhash_client(struct nfs4_client *clp) +{ + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); + + spin_lock(&nn->client_lock); + unhash_client_locked(clp); + spin_unlock(&nn->client_lock); +} + +static __be32 mark_client_expired_locked(struct nfs4_client *clp) +{ + if (atomic_read(&clp->cl_refcount)) + return nfserr_jukebox; + unhash_client_locked(clp); + return nfs_ok; +} + +static void +__destroy_client(struct nfs4_client *clp) { struct nfs4_openowner *oo; struct nfs4_delegation *dp; struct list_head reaplist; - struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); INIT_LIST_HEAD(&reaplist); spin_lock(&state_lock); while (!list_empty(&clp->cl_delegations)) { dp = list_entry(clp->cl_delegations.next, struct nfs4_delegation, dl_perclnt); - list_del_init(&dp->dl_perclnt); - list_move(&dp->dl_recall_lru, &reaplist); + unhash_delegation_locked(dp); + list_add(&dp->dl_recall_lru, &reaplist); } spin_unlock(&state_lock); while (!list_empty(&reaplist)) { dp = list_entry(reaplist.next, struct nfs4_delegation, dl_recall_lru); - destroy_delegation(dp); + list_del_init(&dp->dl_recall_lru); + nfs4_put_stid(&dp->dl_stid); } - list_splice_init(&clp->cl_revoked, &reaplist); - while (!list_empty(&reaplist)) { + while (!list_empty(&clp->cl_revoked)) { dp = list_entry(reaplist.next, struct nfs4_delegation, dl_recall_lru); - destroy_revoked_delegation(dp); + list_del_init(&dp->dl_recall_lru); + nfs4_put_stid(&dp->dl_stid); } while (!list_empty(&clp->cl_openowners)) { oo = list_entry(clp->cl_openowners.next, struct nfs4_openowner, oo_perclient); + atomic_inc(&oo->oo_owner.so_count); release_openowner(oo); } nfsd4_shutdown_callback(clp); if (clp->cl_cb_conn.cb_xprt) svc_xprt_put(clp->cl_cb_conn.cb_xprt); - list_del(&clp->cl_idhash); - if (test_bit(NFSD4_CLIENT_CONFIRMED, &clp->cl_flags)) - rb_erase(&clp->cl_namenode, &nn->conf_name_tree); - else - rb_erase(&clp->cl_namenode, &nn->unconf_name_tree); - spin_lock(&nn->client_lock); - unhash_client_locked(clp); - WARN_ON_ONCE(atomic_read(&clp->cl_refcount)); free_client(clp); - spin_unlock(&nn->client_lock); +} + +static void +destroy_client(struct nfs4_client *clp) +{ + unhash_client(clp); + __destroy_client(clp); } static void expire_client(struct nfs4_client *clp) { + unhash_client(clp); nfsd4_client_record_remove(clp); - destroy_client(clp); + __destroy_client(clp); } static void copy_verf(struct nfs4_client *target, nfs4_verifier *source) @@ -1408,25 +1797,28 @@ static bool mach_creds_match(struct nfs4_client *cl, struct svc_rqst *rqstp) return 0 == strcmp(cl->cl_cred.cr_principal, cr->cr_principal); } -static void gen_clid(struct nfs4_client *clp, struct nfsd_net *nn) +static void gen_confirm(struct nfs4_client *clp, struct nfsd_net *nn) { - static u32 current_clientid = 1; + __be32 verf[2]; - clp->cl_clientid.cl_boot = nn->boot_time; - clp->cl_clientid.cl_id = current_clientid++; + /* + * This is opaque to client, so no need to byte-swap. Use + * __force to keep sparse happy + */ + verf[0] = (__force __be32)get_seconds(); + verf[1] = (__force __be32)nn->clientid_counter; + memcpy(clp->cl_confirm.data, verf, sizeof(clp->cl_confirm.data)); } -static void gen_confirm(struct nfs4_client *clp) +static void gen_clid(struct nfs4_client *clp, struct nfsd_net *nn) { - __be32 verf[2]; - static u32 i; - - verf[0] = (__be32)get_seconds(); - verf[1] = (__be32)i++; - memcpy(clp->cl_confirm.data, verf, sizeof(clp->cl_confirm.data)); + clp->cl_clientid.cl_boot = nn->boot_time; + clp->cl_clientid.cl_id = nn->clientid_counter++; + gen_confirm(clp, nn); } -static struct nfs4_stid *find_stateid(struct nfs4_client *cl, stateid_t *t) +static struct nfs4_stid * +find_stateid_locked(struct nfs4_client *cl, stateid_t *t) { struct nfs4_stid *ret; @@ -1436,16 +1828,21 @@ static struct nfs4_stid *find_stateid(struct nfs4_client *cl, stateid_t *t) return ret; } -static struct nfs4_stid *find_stateid_by_type(struct nfs4_client *cl, stateid_t *t, char typemask) +static struct nfs4_stid * +find_stateid_by_type(struct nfs4_client *cl, stateid_t *t, char typemask) { struct nfs4_stid *s; - s = find_stateid(cl, t); - if (!s) - return NULL; - if (typemask & s->sc_type) - return s; - return NULL; + spin_lock(&cl->cl_lock); + s = find_stateid_locked(cl, t); + if (s != NULL) { + if (typemask & s->sc_type) + atomic_inc(&s->sc_count); + else + s = NULL; + } + spin_unlock(&cl->cl_lock); + return s; } static struct nfs4_client *create_client(struct xdr_netobj name, @@ -1455,7 +1852,6 @@ static struct nfs4_client *create_client(struct xdr_netobj name, struct sockaddr *sa = svc_addr(rqstp); int ret; struct net *net = SVC_NET(rqstp); - struct nfsd_net *nn = net_generic(net, nfsd_net_id); clp = alloc_client(name); if (clp == NULL) @@ -1463,17 +1859,14 @@ static struct nfs4_client *create_client(struct xdr_netobj name, ret = copy_cred(&clp->cl_cred, &rqstp->rq_cred); if (ret) { - spin_lock(&nn->client_lock); free_client(clp); - spin_unlock(&nn->client_lock); return NULL; } - nfsd4_init_callback(&clp->cl_cb_null); + INIT_WORK(&clp->cl_cb_null.cb_work, nfsd4_run_cb_null); clp->cl_time = get_seconds(); clear_bit(0, &clp->cl_cb_slot_busy); copy_verf(clp, verf); rpc_copy_addr((struct sockaddr *) &clp->cl_addr, sa); - gen_confirm(clp); clp->cl_cb_session = NULL; clp->net = net; return clp; @@ -1525,11 +1918,13 @@ add_to_unconfirmed(struct nfs4_client *clp) unsigned int idhashval; struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); + lockdep_assert_held(&nn->client_lock); + clear_bit(NFSD4_CLIENT_CONFIRMED, &clp->cl_flags); add_clp_to_name_tree(clp, &nn->unconf_name_tree); idhashval = clientid_hashval(clp->cl_clientid.cl_id); list_add(&clp->cl_idhash, &nn->unconf_id_hashtbl[idhashval]); - renew_client(clp); + renew_client_locked(clp); } static void @@ -1538,12 +1933,14 @@ move_to_confirmed(struct nfs4_client *clp) unsigned int idhashval = clientid_hashval(clp->cl_clientid.cl_id); struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); + lockdep_assert_held(&nn->client_lock); + dprintk("NFSD: move_to_confirm nfs4_client %p\n", clp); list_move(&clp->cl_idhash, &nn->conf_id_hashtbl[idhashval]); rb_erase(&clp->cl_namenode, &nn->unconf_name_tree); add_clp_to_name_tree(clp, &nn->conf_name_tree); set_bit(NFSD4_CLIENT_CONFIRMED, &clp->cl_flags); - renew_client(clp); + renew_client_locked(clp); } static struct nfs4_client * @@ -1556,7 +1953,7 @@ find_client_in_id_table(struct list_head *tbl, clientid_t *clid, bool sessions) if (same_clid(&clp->cl_clientid, clid)) { if ((bool)clp->cl_minorversion != sessions) return NULL; - renew_client(clp); + renew_client_locked(clp); return clp; } } @@ -1568,6 +1965,7 @@ find_confirmed_client(clientid_t *clid, bool sessions, struct nfsd_net *nn) { struct list_head *tbl = nn->conf_id_hashtbl; + lockdep_assert_held(&nn->client_lock); return find_client_in_id_table(tbl, clid, sessions); } @@ -1576,6 +1974,7 @@ find_unconfirmed_client(clientid_t *clid, bool sessions, struct nfsd_net *nn) { struct list_head *tbl = nn->unconf_id_hashtbl; + lockdep_assert_held(&nn->client_lock); return find_client_in_id_table(tbl, clid, sessions); } @@ -1587,12 +1986,14 @@ static bool clp_used_exchangeid(struct nfs4_client *clp) static struct nfs4_client * find_confirmed_client_by_name(struct xdr_netobj *name, struct nfsd_net *nn) { + lockdep_assert_held(&nn->client_lock); return find_clp_in_name_tree(name, &nn->conf_name_tree); } static struct nfs4_client * find_unconfirmed_client_by_name(struct xdr_netobj *name, struct nfsd_net *nn) { + lockdep_assert_held(&nn->client_lock); return find_clp_in_name_tree(name, &nn->unconf_name_tree); } @@ -1642,7 +2043,7 @@ out_err: /* * Cache a reply. nfsd4_check_resp_size() has bounded the cache size. */ -void +static void nfsd4_store_cache_entry(struct nfsd4_compoundres *resp) { struct xdr_buf *buf = resp->xdr.buf; @@ -1758,7 +2159,8 @@ nfsd4_exchange_id(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_exchange_id *exid) { - struct nfs4_client *unconf, *conf, *new; + struct nfs4_client *conf, *new; + struct nfs4_client *unconf = NULL; __be32 status; char addr_str[INET6_ADDRSTRLEN]; nfs4_verifier verf = exid->verifier; @@ -1787,8 +2189,12 @@ nfsd4_exchange_id(struct svc_rqst *rqstp, return nfserr_encr_alg_unsupp; } + new = create_client(exid->clname, rqstp, &verf); + if (new == NULL) + return nfserr_jukebox; + /* Cases below refer to rfc 5661 section 18.35.4: */ - nfs4_lock_state(); + spin_lock(&nn->client_lock); conf = find_confirmed_client_by_name(&exid->clname, nn); if (conf) { bool creds_match = same_creds(&conf->cl_cred, &rqstp->rq_cred); @@ -1813,7 +2219,6 @@ nfsd4_exchange_id(struct svc_rqst *rqstp, } /* case 6 */ exid->flags |= EXCHGID4_FLAG_CONFIRMED_R; - new = conf; goto out_copy; } if (!creds_match) { /* case 3 */ @@ -1821,15 +2226,14 @@ nfsd4_exchange_id(struct svc_rqst *rqstp, status = nfserr_clid_inuse; goto out; } - expire_client(conf); goto out_new; } if (verfs_match) { /* case 2 */ conf->cl_exchange_flags |= EXCHGID4_FLAG_CONFIRMED_R; - new = conf; goto out_copy; } /* case 5, client reboot */ + conf = NULL; goto out_new; } @@ -1840,33 +2244,38 @@ nfsd4_exchange_id(struct svc_rqst *rqstp, unconf = find_unconfirmed_client_by_name(&exid->clname, nn); if (unconf) /* case 4, possible retry or client restart */ - expire_client(unconf); + unhash_client_locked(unconf); /* case 1 (normal case) */ out_new: - new = create_client(exid->clname, rqstp, &verf); - if (new == NULL) { - status = nfserr_jukebox; - goto out; + if (conf) { + status = mark_client_expired_locked(conf); + if (status) + goto out; } new->cl_minorversion = cstate->minorversion; new->cl_mach_cred = (exid->spa_how == SP4_MACH_CRED); gen_clid(new, nn); add_to_unconfirmed(new); + swap(new, conf); out_copy: - exid->clientid.cl_boot = new->cl_clientid.cl_boot; - exid->clientid.cl_id = new->cl_clientid.cl_id; + exid->clientid.cl_boot = conf->cl_clientid.cl_boot; + exid->clientid.cl_id = conf->cl_clientid.cl_id; - exid->seqid = new->cl_cs_slot.sl_seqid + 1; - nfsd4_set_ex_flags(new, exid); + exid->seqid = conf->cl_cs_slot.sl_seqid + 1; + nfsd4_set_ex_flags(conf, exid); dprintk("nfsd4_exchange_id seqid %d flags %x\n", - new->cl_cs_slot.sl_seqid, new->cl_exchange_flags); + conf->cl_cs_slot.sl_seqid, conf->cl_exchange_flags); status = nfs_ok; out: - nfs4_unlock_state(); + spin_unlock(&nn->client_lock); + if (new) + expire_client(new); + if (unconf) + expire_client(unconf); return status; } @@ -2010,6 +2419,7 @@ nfsd4_create_session(struct svc_rqst *rqstp, { struct sockaddr *sa = svc_addr(rqstp); struct nfs4_client *conf, *unconf; + struct nfs4_client *old = NULL; struct nfsd4_session *new; struct nfsd4_conn *conn; struct nfsd4_clid_slot *cs_slot = NULL; @@ -2035,7 +2445,7 @@ nfsd4_create_session(struct svc_rqst *rqstp, if (!conn) goto out_free_session; - nfs4_lock_state(); + spin_lock(&nn->client_lock); unconf = find_unconfirmed_client(&cr_ses->clientid, true, nn); conf = find_confirmed_client(&cr_ses->clientid, true, nn); WARN_ON_ONCE(conf && unconf); @@ -2054,7 +2464,6 @@ nfsd4_create_session(struct svc_rqst *rqstp, goto out_free_conn; } } else if (unconf) { - struct nfs4_client *old; if (!same_creds(&unconf->cl_cred, &rqstp->rq_cred) || !rpc_cmp_addr(sa, (struct sockaddr *) &unconf->cl_addr)) { status = nfserr_clid_inuse; @@ -2072,10 +2481,11 @@ nfsd4_create_session(struct svc_rqst *rqstp, } old = find_confirmed_client_by_name(&unconf->cl_name, nn); if (old) { - status = mark_client_expired(old); - if (status) + status = mark_client_expired_locked(old); + if (status) { + old = NULL; goto out_free_conn; - expire_client(old); + } } move_to_confirmed(unconf); conf = unconf; @@ -2091,20 +2501,27 @@ nfsd4_create_session(struct svc_rqst *rqstp, cr_ses->flags &= ~SESSION4_RDMA; init_session(rqstp, new, conf, cr_ses); - nfsd4_init_conn(rqstp, conn, new); + nfsd4_get_session_locked(new); memcpy(cr_ses->sessionid.data, new->se_sessionid.data, NFS4_MAX_SESSIONID_LEN); cs_slot->sl_seqid++; cr_ses->seqid = cs_slot->sl_seqid; - /* cache solo and embedded create sessions under the state lock */ + /* cache solo and embedded create sessions under the client_lock */ nfsd4_cache_create_session(cr_ses, cs_slot, status); - nfs4_unlock_state(); + spin_unlock(&nn->client_lock); + /* init connection and backchannel */ + nfsd4_init_conn(rqstp, conn, new); + nfsd4_put_session(new); + if (old) + expire_client(old); return status; out_free_conn: - nfs4_unlock_state(); + spin_unlock(&nn->client_lock); free_conn(conn); + if (old) + expire_client(old); out_free_session: __free_session(new); out_release_drc_mem: @@ -2152,17 +2569,16 @@ __be32 nfsd4_bind_conn_to_session(struct svc_rqst *rqstp, __be32 status; struct nfsd4_conn *conn; struct nfsd4_session *session; - struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); + struct net *net = SVC_NET(rqstp); + struct nfsd_net *nn = net_generic(net, nfsd_net_id); if (!nfsd4_last_compound_op(rqstp)) return nfserr_not_only_op; - nfs4_lock_state(); spin_lock(&nn->client_lock); - session = find_in_sessionid_hashtbl(&bcts->sessionid, SVC_NET(rqstp)); + session = find_in_sessionid_hashtbl(&bcts->sessionid, net, &status); spin_unlock(&nn->client_lock); - status = nfserr_badsession; if (!session) - goto out; + goto out_no_session; status = nfserr_wrong_cred; if (!mach_creds_match(session->se_client, rqstp)) goto out; @@ -2176,7 +2592,8 @@ __be32 nfsd4_bind_conn_to_session(struct svc_rqst *rqstp, nfsd4_init_conn(rqstp, conn, session); status = nfs_ok; out: - nfs4_unlock_state(); + nfsd4_put_session(session); +out_no_session: return status; } @@ -2195,9 +2612,9 @@ nfsd4_destroy_session(struct svc_rqst *r, struct nfsd4_session *ses; __be32 status; int ref_held_by_me = 0; - struct nfsd_net *nn = net_generic(SVC_NET(r), nfsd_net_id); + struct net *net = SVC_NET(r); + struct nfsd_net *nn = net_generic(net, nfsd_net_id); - nfs4_lock_state(); status = nfserr_not_only_op; if (nfsd4_compound_in_session(cstate->session, &sessionid->sessionid)) { if (!nfsd4_last_compound_op(r)) @@ -2206,14 +2623,12 @@ nfsd4_destroy_session(struct svc_rqst *r, } dump_sessionid(__func__, &sessionid->sessionid); spin_lock(&nn->client_lock); - ses = find_in_sessionid_hashtbl(&sessionid->sessionid, SVC_NET(r)); - status = nfserr_badsession; + ses = find_in_sessionid_hashtbl(&sessionid->sessionid, net, &status); if (!ses) goto out_client_lock; status = nfserr_wrong_cred; if (!mach_creds_match(ses->se_client, r)) - goto out_client_lock; - nfsd4_get_session_locked(ses); + goto out_put_session; status = mark_session_dead_locked(ses, 1 + ref_held_by_me); if (status) goto out_put_session; @@ -2225,11 +2640,10 @@ nfsd4_destroy_session(struct svc_rqst *r, spin_lock(&nn->client_lock); status = nfs_ok; out_put_session: - nfsd4_put_session(ses); + nfsd4_put_session_locked(ses); out_client_lock: spin_unlock(&nn->client_lock); out: - nfs4_unlock_state(); return status; } @@ -2300,7 +2714,8 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_conn *conn; __be32 status; int buflen; - struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); + struct net *net = SVC_NET(rqstp); + struct nfsd_net *nn = net_generic(net, nfsd_net_id); if (resp->opcnt != 1) return nfserr_sequence_pos; @@ -2314,17 +2729,10 @@ nfsd4_sequence(struct svc_rqst *rqstp, return nfserr_jukebox; spin_lock(&nn->client_lock); - status = nfserr_badsession; - session = find_in_sessionid_hashtbl(&seq->sessionid, SVC_NET(rqstp)); + session = find_in_sessionid_hashtbl(&seq->sessionid, net, &status); if (!session) goto out_no_session; clp = session->se_client; - status = get_client_locked(clp); - if (status) - goto out_no_session; - status = nfsd4_get_session_locked(session); - if (status) - goto out_put_client; status = nfserr_too_many_ops; if (nfsd4_session_too_many_ops(rqstp, session)) @@ -2354,6 +2762,7 @@ nfsd4_sequence(struct svc_rqst *rqstp, goto out_put_session; cstate->slot = slot; cstate->session = session; + cstate->clp = clp; /* Return the cached reply status and set cstate->status * for nfsd4_proc_compound processing */ status = nfsd4_replay_cache_entry(resp, seq); @@ -2388,6 +2797,7 @@ nfsd4_sequence(struct svc_rqst *rqstp, cstate->slot = slot; cstate->session = session; + cstate->clp = clp; out: switch (clp->cl_cb_state) { @@ -2408,31 +2818,48 @@ out_no_session: spin_unlock(&nn->client_lock); return status; out_put_session: - nfsd4_put_session(session); -out_put_client: - put_client_renew_locked(clp); + nfsd4_put_session_locked(session); goto out_no_session; } +void +nfsd4_sequence_done(struct nfsd4_compoundres *resp) +{ + struct nfsd4_compound_state *cs = &resp->cstate; + + if (nfsd4_has_session(cs)) { + if (cs->status != nfserr_replay_cache) { + nfsd4_store_cache_entry(resp); + cs->slot->sl_flags &= ~NFSD4_SLOT_INUSE; + } + /* Drop session reference that was taken in nfsd4_sequence() */ + nfsd4_put_session(cs->session); + } else if (cs->clp) + put_client_renew(cs->clp); +} + __be32 nfsd4_destroy_clientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_destroy_clientid *dc) { - struct nfs4_client *conf, *unconf, *clp; + struct nfs4_client *conf, *unconf; + struct nfs4_client *clp = NULL; __be32 status = 0; struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); - nfs4_lock_state(); + spin_lock(&nn->client_lock); unconf = find_unconfirmed_client(&dc->clientid, true, nn); conf = find_confirmed_client(&dc->clientid, true, nn); WARN_ON_ONCE(conf && unconf); if (conf) { - clp = conf; - if (client_has_state(conf)) { status = nfserr_clientid_busy; goto out; } + status = mark_client_expired_locked(conf); + if (status) + goto out; + clp = conf; } else if (unconf) clp = unconf; else { @@ -2440,12 +2867,15 @@ nfsd4_destroy_clientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *csta goto out; } if (!mach_creds_match(clp, rqstp)) { + clp = NULL; status = nfserr_wrong_cred; goto out; } - expire_client(clp); + unhash_client_locked(clp); out: - nfs4_unlock_state(); + spin_unlock(&nn->client_lock); + if (clp) + expire_client(clp); return status; } @@ -2464,7 +2894,6 @@ nfsd4_reclaim_complete(struct svc_rqst *rqstp, struct nfsd4_compound_state *csta return nfs_ok; } - nfs4_lock_state(); status = nfserr_complete_already; if (test_and_set_bit(NFSD4_CLIENT_RECLAIM_COMPLETE, &cstate->session->se_client->cl_flags)) @@ -2484,7 +2913,6 @@ nfsd4_reclaim_complete(struct svc_rqst *rqstp, struct nfsd4_compound_state *csta status = nfs_ok; nfsd4_client_record_create(cstate->session->se_client); out: - nfs4_unlock_state(); return status; } @@ -2494,12 +2922,16 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, { struct xdr_netobj clname = setclid->se_name; nfs4_verifier clverifier = setclid->se_verf; - struct nfs4_client *conf, *unconf, *new; + struct nfs4_client *conf, *new; + struct nfs4_client *unconf = NULL; __be32 status; struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); + new = create_client(clname, rqstp, &clverifier); + if (new == NULL) + return nfserr_jukebox; /* Cases below refer to rfc 3530 section 14.2.33: */ - nfs4_lock_state(); + spin_lock(&nn->client_lock); conf = find_confirmed_client_by_name(&clname, nn); if (conf) { /* case 0: */ @@ -2517,11 +2949,7 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, } unconf = find_unconfirmed_client_by_name(&clname, nn); if (unconf) - expire_client(unconf); - status = nfserr_jukebox; - new = create_client(clname, rqstp, &clverifier); - if (new == NULL) - goto out; + unhash_client_locked(unconf); if (conf && same_verf(&conf->cl_verifier, &clverifier)) /* case 1: probable callback update */ copy_clid(new, conf); @@ -2533,9 +2961,14 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, setclid->se_clientid.cl_boot = new->cl_clientid.cl_boot; setclid->se_clientid.cl_id = new->cl_clientid.cl_id; memcpy(setclid->se_confirm.data, new->cl_confirm.data, sizeof(setclid->se_confirm.data)); + new = NULL; status = nfs_ok; out: - nfs4_unlock_state(); + spin_unlock(&nn->client_lock); + if (new) + free_client(new); + if (unconf) + expire_client(unconf); return status; } @@ -2546,6 +2979,7 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, struct nfsd4_setclientid_confirm *setclientid_confirm) { struct nfs4_client *conf, *unconf; + struct nfs4_client *old = NULL; nfs4_verifier confirm = setclientid_confirm->sc_confirm; clientid_t * clid = &setclientid_confirm->sc_clientid; __be32 status; @@ -2553,8 +2987,8 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, if (STALE_CLIENTID(clid, nn)) return nfserr_stale_clientid; - nfs4_lock_state(); + spin_lock(&nn->client_lock); conf = find_confirmed_client(clid, false, nn); unconf = find_unconfirmed_client(clid, false, nn); /* @@ -2578,22 +3012,30 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, } status = nfs_ok; if (conf) { /* case 1: callback update */ + old = unconf; + unhash_client_locked(old); nfsd4_change_callback(conf, &unconf->cl_cb_conn); - nfsd4_probe_callback(conf); - expire_client(unconf); } else { /* case 3: normal case; new or rebooted client */ - conf = find_confirmed_client_by_name(&unconf->cl_name, nn); - if (conf) { - status = mark_client_expired(conf); - if (status) + old = find_confirmed_client_by_name(&unconf->cl_name, nn); + if (old) { + status = mark_client_expired_locked(old); + if (status) { + old = NULL; goto out; - expire_client(conf); + } } move_to_confirmed(unconf); - nfsd4_probe_callback(unconf); + conf = unconf; } + get_client_locked(conf); + spin_unlock(&nn->client_lock); + nfsd4_probe_callback(conf); + spin_lock(&nn->client_lock); + put_client_renew_locked(conf); out: - nfs4_unlock_state(); + spin_unlock(&nn->client_lock); + if (old) + expire_client(old); return status; } @@ -2603,21 +3045,23 @@ static struct nfs4_file *nfsd4_alloc_file(void) } /* OPEN Share state helper functions */ -static void nfsd4_init_file(struct nfs4_file *fp, struct inode *ino) +static void nfsd4_init_file(struct nfs4_file *fp, struct knfsd_fh *fh) { - unsigned int hashval = file_hashval(ino); + unsigned int hashval = file_hashval(fh); + + lockdep_assert_held(&state_lock); atomic_set(&fp->fi_ref, 1); + spin_lock_init(&fp->fi_lock); INIT_LIST_HEAD(&fp->fi_stateids); INIT_LIST_HEAD(&fp->fi_delegations); - fp->fi_inode = igrab(ino); + fh_copy_shallow(&fp->fi_fhandle, fh); fp->fi_had_conflict = false; fp->fi_lease = NULL; + fp->fi_share_deny = 0; memset(fp->fi_fds, 0, sizeof(fp->fi_fds)); memset(fp->fi_access, 0, sizeof(fp->fi_access)); - spin_lock(&state_lock); hlist_add_head(&fp->fi_hash, &file_hashtbl[hashval]); - spin_unlock(&state_lock); } void @@ -2673,6 +3117,28 @@ static void init_nfs4_replay(struct nfs4_replay *rp) rp->rp_status = nfserr_serverfault; rp->rp_buflen = 0; rp->rp_buf = rp->rp_ibuf; + mutex_init(&rp->rp_mutex); +} + +static void nfsd4_cstate_assign_replay(struct nfsd4_compound_state *cstate, + struct nfs4_stateowner *so) +{ + if (!nfsd4_has_session(cstate)) { + mutex_lock(&so->so_replay.rp_mutex); + cstate->replay_owner = so; + atomic_inc(&so->so_count); + } +} + +void nfsd4_cstate_clear_replay(struct nfsd4_compound_state *cstate) +{ + struct nfs4_stateowner *so = cstate->replay_owner; + + if (so != NULL) { + cstate->replay_owner = NULL; + mutex_unlock(&so->so_replay.rp_mutex); + nfs4_put_stateowner(so); + } } static inline void *alloc_stateowner(struct kmem_cache *slab, struct xdr_netobj *owner, struct nfs4_client *clp) @@ -2693,111 +3159,172 @@ static inline void *alloc_stateowner(struct kmem_cache *slab, struct xdr_netobj INIT_LIST_HEAD(&sop->so_stateids); sop->so_client = clp; init_nfs4_replay(&sop->so_replay); + atomic_set(&sop->so_count, 1); return sop; } static void hash_openowner(struct nfs4_openowner *oo, struct nfs4_client *clp, unsigned int strhashval) { - struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); + lockdep_assert_held(&clp->cl_lock); - list_add(&oo->oo_owner.so_strhash, &nn->ownerstr_hashtbl[strhashval]); + list_add(&oo->oo_owner.so_strhash, + &clp->cl_ownerstr_hashtbl[strhashval]); list_add(&oo->oo_perclient, &clp->cl_openowners); } +static void nfs4_unhash_openowner(struct nfs4_stateowner *so) +{ + unhash_openowner_locked(openowner(so)); +} + +static void nfs4_free_openowner(struct nfs4_stateowner *so) +{ + struct nfs4_openowner *oo = openowner(so); + + kmem_cache_free(openowner_slab, oo); +} + +static const struct nfs4_stateowner_operations openowner_ops = { + .so_unhash = nfs4_unhash_openowner, + .so_free = nfs4_free_openowner, +}; + static struct nfs4_openowner * -alloc_init_open_stateowner(unsigned int strhashval, struct nfs4_client *clp, struct nfsd4_open *open) { - struct nfs4_openowner *oo; +alloc_init_open_stateowner(unsigned int strhashval, struct nfsd4_open *open, + struct nfsd4_compound_state *cstate) +{ + struct nfs4_client *clp = cstate->clp; + struct nfs4_openowner *oo, *ret; oo = alloc_stateowner(openowner_slab, &open->op_owner, clp); if (!oo) return NULL; + oo->oo_owner.so_ops = &openowner_ops; oo->oo_owner.so_is_open_owner = 1; oo->oo_owner.so_seqid = open->op_seqid; - oo->oo_flags = NFS4_OO_NEW; + oo->oo_flags = 0; + if (nfsd4_has_session(cstate)) + oo->oo_flags |= NFS4_OO_CONFIRMED; oo->oo_time = 0; oo->oo_last_closed_stid = NULL; INIT_LIST_HEAD(&oo->oo_close_lru); - hash_openowner(oo, clp, strhashval); + spin_lock(&clp->cl_lock); + ret = find_openstateowner_str_locked(strhashval, open, clp); + if (ret == NULL) { + hash_openowner(oo, clp, strhashval); + ret = oo; + } else + nfs4_free_openowner(&oo->oo_owner); + spin_unlock(&clp->cl_lock); return oo; } static void init_open_stateid(struct nfs4_ol_stateid *stp, struct nfs4_file *fp, struct nfsd4_open *open) { struct nfs4_openowner *oo = open->op_openowner; + atomic_inc(&stp->st_stid.sc_count); stp->st_stid.sc_type = NFS4_OPEN_STID; - INIT_LIST_HEAD(&stp->st_lockowners); - list_add(&stp->st_perstateowner, &oo->oo_owner.so_stateids); - list_add(&stp->st_perfile, &fp->fi_stateids); + INIT_LIST_HEAD(&stp->st_locks); stp->st_stateowner = &oo->oo_owner; + atomic_inc(&stp->st_stateowner->so_count); get_nfs4_file(fp); - stp->st_file = fp; + stp->st_stid.sc_file = fp; stp->st_access_bmap = 0; stp->st_deny_bmap = 0; - set_access(open->op_share_access, stp); - set_deny(open->op_share_deny, stp); stp->st_openstp = NULL; + spin_lock(&oo->oo_owner.so_client->cl_lock); + list_add(&stp->st_perstateowner, &oo->oo_owner.so_stateids); + spin_lock(&fp->fi_lock); + list_add(&stp->st_perfile, &fp->fi_stateids); + spin_unlock(&fp->fi_lock); + spin_unlock(&oo->oo_owner.so_client->cl_lock); } +/* + * In the 4.0 case we need to keep the owners around a little while to handle + * CLOSE replay. We still do need to release any file access that is held by + * them before returning however. + */ static void -move_to_close_lru(struct nfs4_openowner *oo, struct net *net) +move_to_close_lru(struct nfs4_ol_stateid *s, struct net *net) { - struct nfsd_net *nn = net_generic(net, nfsd_net_id); + struct nfs4_ol_stateid *last; + struct nfs4_openowner *oo = openowner(s->st_stateowner); + struct nfsd_net *nn = net_generic(s->st_stid.sc_client->net, + nfsd_net_id); dprintk("NFSD: move_to_close_lru nfs4_openowner %p\n", oo); + /* + * We know that we hold one reference via nfsd4_close, and another + * "persistent" reference for the client. If the refcount is higher + * than 2, then there are still calls in progress that are using this + * stateid. We can't put the sc_file reference until they are finished. + * Wait for the refcount to drop to 2. Since it has been unhashed, + * there should be no danger of the refcount going back up again at + * this point. + */ + wait_event(close_wq, atomic_read(&s->st_stid.sc_count) == 2); + + release_all_access(s); + if (s->st_stid.sc_file) { + put_nfs4_file(s->st_stid.sc_file); + s->st_stid.sc_file = NULL; + } + + spin_lock(&nn->client_lock); + last = oo->oo_last_closed_stid; + oo->oo_last_closed_stid = s; list_move_tail(&oo->oo_close_lru, &nn->close_lru); oo->oo_time = get_seconds(); + spin_unlock(&nn->client_lock); + if (last) + nfs4_put_stid(&last->st_stid); } -static int -same_owner_str(struct nfs4_stateowner *sop, struct xdr_netobj *owner, - clientid_t *clid) +/* search file_hashtbl[] for file */ +static struct nfs4_file * +find_file_locked(struct knfsd_fh *fh) { - return (sop->so_owner.len == owner->len) && - 0 == memcmp(sop->so_owner.data, owner->data, owner->len) && - (sop->so_client->cl_clientid.cl_id == clid->cl_id); -} + unsigned int hashval = file_hashval(fh); + struct nfs4_file *fp; -static struct nfs4_openowner * -find_openstateowner_str(unsigned int hashval, struct nfsd4_open *open, - bool sessions, struct nfsd_net *nn) -{ - struct nfs4_stateowner *so; - struct nfs4_openowner *oo; - struct nfs4_client *clp; + lockdep_assert_held(&state_lock); - list_for_each_entry(so, &nn->ownerstr_hashtbl[hashval], so_strhash) { - if (!so->so_is_open_owner) - continue; - if (same_owner_str(so, &open->op_owner, &open->op_clientid)) { - oo = openowner(so); - clp = oo->oo_owner.so_client; - if ((bool)clp->cl_minorversion != sessions) - return NULL; - renew_client(oo->oo_owner.so_client); - return oo; + hlist_for_each_entry(fp, &file_hashtbl[hashval], fi_hash) { + if (nfsd_fh_match(&fp->fi_fhandle, fh)) { + get_nfs4_file(fp); + return fp; } } return NULL; } -/* search file_hashtbl[] for file */ static struct nfs4_file * -find_file(struct inode *ino) +find_file(struct knfsd_fh *fh) { - unsigned int hashval = file_hashval(ino); struct nfs4_file *fp; spin_lock(&state_lock); - hlist_for_each_entry(fp, &file_hashtbl[hashval], fi_hash) { - if (fp->fi_inode == ino) { - get_nfs4_file(fp); - spin_unlock(&state_lock); - return fp; - } + fp = find_file_locked(fh); + spin_unlock(&state_lock); + return fp; +} + +static struct nfs4_file * +find_or_add_file(struct nfs4_file *new, struct knfsd_fh *fh) +{ + struct nfs4_file *fp; + + spin_lock(&state_lock); + fp = find_file_locked(fh); + if (fp == NULL) { + nfsd4_init_file(new, fh); + fp = new; } spin_unlock(&state_lock); - return NULL; + + return fp; } /* @@ -2807,47 +3334,53 @@ find_file(struct inode *ino) static __be32 nfs4_share_conflict(struct svc_fh *current_fh, unsigned int deny_type) { - struct inode *ino = current_fh->fh_dentry->d_inode; struct nfs4_file *fp; - struct nfs4_ol_stateid *stp; - __be32 ret; + __be32 ret = nfs_ok; - fp = find_file(ino); + fp = find_file(¤t_fh->fh_handle); if (!fp) - return nfs_ok; - ret = nfserr_locked; - /* Search for conflicting share reservations */ - list_for_each_entry(stp, &fp->fi_stateids, st_perfile) { - if (test_deny(deny_type, stp) || - test_deny(NFS4_SHARE_DENY_BOTH, stp)) - goto out; - } - ret = nfs_ok; -out: + return ret; + /* Check for conflicting share reservations */ + spin_lock(&fp->fi_lock); + if (fp->fi_share_deny & deny_type) + ret = nfserr_locked; + spin_unlock(&fp->fi_lock); put_nfs4_file(fp); return ret; } -static void nfsd_break_one_deleg(struct nfs4_delegation *dp) +void nfsd4_prepare_cb_recall(struct nfs4_delegation *dp) { - struct nfs4_client *clp = dp->dl_stid.sc_client; - struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); + struct nfsd_net *nn = net_generic(dp->dl_stid.sc_client->net, + nfsd_net_id); - lockdep_assert_held(&state_lock); - /* We're assuming the state code never drops its reference + block_delegations(&dp->dl_stid.sc_file->fi_fhandle); + + /* + * We can't do this in nfsd_break_deleg_cb because it is + * already holding inode->i_lock. + * + * If the dl_time != 0, then we know that it has already been + * queued for a lease break. Don't queue it again. + */ + spin_lock(&state_lock); + if (dp->dl_time == 0) { + dp->dl_time = get_seconds(); + list_add_tail(&dp->dl_recall_lru, &nn->del_recall_lru); + } + spin_unlock(&state_lock); +} + +static void nfsd_break_one_deleg(struct nfs4_delegation *dp) +{ + /* + * We're assuming the state code never drops its reference * without first removing the lease. Since we're in this lease * callback (and since the lease code is serialized by the kernel * lock) we know the server hasn't removed the lease yet, we know - * it's safe to take a reference: */ - atomic_inc(&dp->dl_count); - - list_add_tail(&dp->dl_recall_lru, &nn->del_recall_lru); - - /* Only place dl_time is set; protected by i_lock: */ - dp->dl_time = get_seconds(); - - block_delegations(&dp->dl_fh); - + * it's safe to take a reference. + */ + atomic_inc(&dp->dl_stid.sc_count); nfsd4_cb_recall(dp); } @@ -2872,11 +3405,20 @@ static void nfsd_break_deleg_cb(struct file_lock *fl) */ fl->fl_break_time = 0; - spin_lock(&state_lock); + spin_lock(&fp->fi_lock); fp->fi_had_conflict = true; - list_for_each_entry(dp, &fp->fi_delegations, dl_perfile) - nfsd_break_one_deleg(dp); - spin_unlock(&state_lock); + /* + * If there are no delegations on the list, then we can't count on this + * lease ever being cleaned up. Set the fl_break_time to jiffies so that + * time_out_leases will do it ASAP. The fact that fi_had_conflict is now + * true should keep any new delegations from being hashed. + */ + if (list_empty(&fp->fi_delegations)) + fl->fl_break_time = jiffies; + else + list_for_each_entry(dp, &fp->fi_delegations, dl_perfile) + nfsd_break_one_deleg(dp); + spin_unlock(&fp->fi_lock); } static @@ -2904,6 +3446,42 @@ static __be32 nfsd4_check_seqid(struct nfsd4_compound_state *cstate, struct nfs4 return nfserr_bad_seqid; } +static __be32 lookup_clientid(clientid_t *clid, + struct nfsd4_compound_state *cstate, + struct nfsd_net *nn) +{ + struct nfs4_client *found; + + if (cstate->clp) { + found = cstate->clp; + if (!same_clid(&found->cl_clientid, clid)) + return nfserr_stale_clientid; + return nfs_ok; + } + + if (STALE_CLIENTID(clid, nn)) + return nfserr_stale_clientid; + + /* + * For v4.1+ we get the client in the SEQUENCE op. If we don't have one + * cached already then we know this is for is for v4.0 and "sessions" + * will be false. + */ + WARN_ON_ONCE(cstate->session); + spin_lock(&nn->client_lock); + found = find_confirmed_client(clid, false, nn); + if (!found) { + spin_unlock(&nn->client_lock); + return nfserr_expired; + } + atomic_inc(&found->cl_refcount); + spin_unlock(&nn->client_lock); + + /* Cache the nfs4_client in cstate! */ + cstate->clp = found; + return nfs_ok; +} + __be32 nfsd4_process_open1(struct nfsd4_compound_state *cstate, struct nfsd4_open *open, struct nfsd_net *nn) @@ -2924,19 +3502,19 @@ nfsd4_process_open1(struct nfsd4_compound_state *cstate, if (open->op_file == NULL) return nfserr_jukebox; - strhashval = ownerstr_hashval(clientid->cl_id, &open->op_owner); - oo = find_openstateowner_str(strhashval, open, cstate->minorversion, nn); + status = lookup_clientid(clientid, cstate, nn); + if (status) + return status; + clp = cstate->clp; + + strhashval = ownerstr_hashval(&open->op_owner); + oo = find_openstateowner_str(strhashval, open, clp); open->op_openowner = oo; if (!oo) { - clp = find_confirmed_client(clientid, cstate->minorversion, - nn); - if (clp == NULL) - return nfserr_expired; goto new_owner; } if (!(oo->oo_flags & NFS4_OO_CONFIRMED)) { /* Replace unconfirmed owners without checking for replay. */ - clp = oo->oo_owner.so_client; release_openowner(oo); open->op_openowner = NULL; goto new_owner; @@ -2944,15 +3522,14 @@ nfsd4_process_open1(struct nfsd4_compound_state *cstate, status = nfsd4_check_seqid(cstate, &oo->oo_owner, open->op_seqid); if (status) return status; - clp = oo->oo_owner.so_client; goto alloc_stateid; new_owner: - oo = alloc_init_open_stateowner(strhashval, clp, open); + oo = alloc_init_open_stateowner(strhashval, open, cstate); if (oo == NULL) return nfserr_jukebox; open->op_openowner = oo; alloc_stateid: - open->op_stp = nfs4_alloc_stateid(clp); + open->op_stp = nfs4_alloc_open_stateid(clp); if (!open->op_stp) return nfserr_jukebox; return nfs_ok; @@ -2994,14 +3571,18 @@ nfs4_check_deleg(struct nfs4_client *cl, struct nfsd4_open *open, { int flags; __be32 status = nfserr_bad_stateid; + struct nfs4_delegation *deleg; - *dp = find_deleg_stateid(cl, &open->op_delegate_stateid); - if (*dp == NULL) + deleg = find_deleg_stateid(cl, &open->op_delegate_stateid); + if (deleg == NULL) goto out; flags = share_access_to_flags(open->op_share_access); - status = nfs4_check_delegmode(*dp, flags); - if (status) - *dp = NULL; + status = nfs4_check_delegmode(deleg, flags); + if (status) { + nfs4_put_stid(&deleg->dl_stid); + goto out; + } + *dp = deleg; out: if (!nfsd4_is_deleg_cur(open)) return nfs_ok; @@ -3011,24 +3592,25 @@ out: return nfs_ok; } -static __be32 -nfs4_check_open(struct nfs4_file *fp, struct nfsd4_open *open, struct nfs4_ol_stateid **stpp) +static struct nfs4_ol_stateid * +nfsd4_find_existing_open(struct nfs4_file *fp, struct nfsd4_open *open) { - struct nfs4_ol_stateid *local; + struct nfs4_ol_stateid *local, *ret = NULL; struct nfs4_openowner *oo = open->op_openowner; + spin_lock(&fp->fi_lock); list_for_each_entry(local, &fp->fi_stateids, st_perfile) { /* ignore lock owners */ if (local->st_stateowner->so_is_open_owner == 0) continue; - /* remember if we have seen this open owner */ - if (local->st_stateowner == &oo->oo_owner) - *stpp = local; - /* check for conflicting share reservations */ - if (!test_share(local, open)) - return nfserr_share_denied; + if (local->st_stateowner == &oo->oo_owner) { + ret = local; + atomic_inc(&ret->st_stid.sc_count); + break; + } } - return nfs_ok; + spin_unlock(&fp->fi_lock); + return ret; } static inline int nfs4_access_to_access(u32 nfs4_access) @@ -3042,24 +3624,6 @@ static inline int nfs4_access_to_access(u32 nfs4_access) return flags; } -static __be32 nfs4_get_vfs_file(struct svc_rqst *rqstp, struct nfs4_file *fp, - struct svc_fh *cur_fh, struct nfsd4_open *open) -{ - __be32 status; - int oflag = nfs4_access_to_omode(open->op_share_access); - int access = nfs4_access_to_access(open->op_share_access); - - if (!fp->fi_fds[oflag]) { - status = nfsd_open(rqstp, cur_fh, S_IFREG, access, - &fp->fi_fds[oflag]); - if (status) - return status; - } - nfs4_file_get_access(fp, oflag); - - return nfs_ok; -} - static inline __be32 nfsd4_truncate(struct svc_rqst *rqstp, struct svc_fh *fh, struct nfsd4_open *open) @@ -3075,34 +3639,99 @@ nfsd4_truncate(struct svc_rqst *rqstp, struct svc_fh *fh, return nfsd_setattr(rqstp, fh, &iattr, 0, (time_t)0); } -static __be32 -nfs4_upgrade_open(struct svc_rqst *rqstp, struct nfs4_file *fp, struct svc_fh *cur_fh, struct nfs4_ol_stateid *stp, struct nfsd4_open *open) +static __be32 nfs4_get_vfs_file(struct svc_rqst *rqstp, struct nfs4_file *fp, + struct svc_fh *cur_fh, struct nfs4_ol_stateid *stp, + struct nfsd4_open *open) { - u32 op_share_access = open->op_share_access; - bool new_access; + struct file *filp = NULL; __be32 status; + int oflag = nfs4_access_to_omode(open->op_share_access); + int access = nfs4_access_to_access(open->op_share_access); + unsigned char old_access_bmap, old_deny_bmap; - new_access = !test_access(op_share_access, stp); - if (new_access) { - status = nfs4_get_vfs_file(rqstp, fp, cur_fh, open); - if (status) - return status; + spin_lock(&fp->fi_lock); + + /* + * Are we trying to set a deny mode that would conflict with + * current access? + */ + status = nfs4_file_check_deny(fp, open->op_share_deny); + if (status != nfs_ok) { + spin_unlock(&fp->fi_lock); + goto out; } - status = nfsd4_truncate(rqstp, cur_fh, open); - if (status) { - if (new_access) { - int oflag = nfs4_access_to_omode(op_share_access); - nfs4_file_put_access(fp, oflag); - } - return status; + + /* set access to the file */ + status = nfs4_file_get_access(fp, open->op_share_access); + if (status != nfs_ok) { + spin_unlock(&fp->fi_lock); + goto out; } - /* remember the open */ - set_access(op_share_access, stp); + + /* Set access bits in stateid */ + old_access_bmap = stp->st_access_bmap; + set_access(open->op_share_access, stp); + + /* Set new deny mask */ + old_deny_bmap = stp->st_deny_bmap; set_deny(open->op_share_deny, stp); + fp->fi_share_deny |= (open->op_share_deny & NFS4_SHARE_DENY_BOTH); - return nfs_ok; + if (!fp->fi_fds[oflag]) { + spin_unlock(&fp->fi_lock); + status = nfsd_open(rqstp, cur_fh, S_IFREG, access, &filp); + if (status) + goto out_put_access; + spin_lock(&fp->fi_lock); + if (!fp->fi_fds[oflag]) { + fp->fi_fds[oflag] = filp; + filp = NULL; + } + } + spin_unlock(&fp->fi_lock); + if (filp) + fput(filp); + + status = nfsd4_truncate(rqstp, cur_fh, open); + if (status) + goto out_put_access; +out: + return status; +out_put_access: + stp->st_access_bmap = old_access_bmap; + nfs4_file_put_access(fp, open->op_share_access); + reset_union_bmap_deny(bmap_to_share_mode(old_deny_bmap), stp); + goto out; } +static __be32 +nfs4_upgrade_open(struct svc_rqst *rqstp, struct nfs4_file *fp, struct svc_fh *cur_fh, struct nfs4_ol_stateid *stp, struct nfsd4_open *open) +{ + __be32 status; + unsigned char old_deny_bmap; + + if (!test_access(open->op_share_access, stp)) + return nfs4_get_vfs_file(rqstp, fp, cur_fh, stp, open); + + /* test and set deny mode */ + spin_lock(&fp->fi_lock); + status = nfs4_file_check_deny(fp, open->op_share_deny); + if (status == nfs_ok) { + old_deny_bmap = stp->st_deny_bmap; + set_deny(open->op_share_deny, stp); + fp->fi_share_deny |= + (open->op_share_deny & NFS4_SHARE_DENY_BOTH); + } + spin_unlock(&fp->fi_lock); + + if (status != nfs_ok) + return status; + + status = nfsd4_truncate(rqstp, cur_fh, open); + if (status != nfs_ok) + reset_union_bmap_deny(old_deny_bmap, stp); + return status; +} static void nfs4_set_claim_prev(struct nfsd4_open *open, bool has_session) @@ -3123,7 +3752,7 @@ static bool nfsd4_cb_channel_good(struct nfs4_client *clp) return clp->cl_minorversion && clp->cl_cb_state == NFSD4_CB_UNKNOWN; } -static struct file_lock *nfs4_alloc_init_lease(struct nfs4_delegation *dp, int flag) +static struct file_lock *nfs4_alloc_init_lease(struct nfs4_file *fp, int flag) { struct file_lock *fl; @@ -3135,53 +3764,101 @@ static struct file_lock *nfs4_alloc_init_lease(struct nfs4_delegation *dp, int f fl->fl_flags = FL_DELEG; fl->fl_type = flag == NFS4_OPEN_DELEGATE_READ? F_RDLCK: F_WRLCK; fl->fl_end = OFFSET_MAX; - fl->fl_owner = (fl_owner_t)(dp->dl_file); + fl->fl_owner = (fl_owner_t)fp; fl->fl_pid = current->tgid; return fl; } static int nfs4_setlease(struct nfs4_delegation *dp) { - struct nfs4_file *fp = dp->dl_file; + struct nfs4_file *fp = dp->dl_stid.sc_file; struct file_lock *fl; - int status; + struct file *filp; + int status = 0; - fl = nfs4_alloc_init_lease(dp, NFS4_OPEN_DELEGATE_READ); + fl = nfs4_alloc_init_lease(fp, NFS4_OPEN_DELEGATE_READ); if (!fl) return -ENOMEM; - fl->fl_file = find_readable_file(fp); - status = vfs_setlease(fl->fl_file, fl->fl_type, &fl); - if (status) - goto out_free; + filp = find_readable_file(fp); + if (!filp) { + /* We should always have a readable file here */ + WARN_ON_ONCE(1); + return -EBADF; + } + fl->fl_file = filp; + status = vfs_setlease(filp, fl->fl_type, &fl); + if (status) { + locks_free_lock(fl); + goto out_fput; + } + spin_lock(&state_lock); + spin_lock(&fp->fi_lock); + /* Did the lease get broken before we took the lock? */ + status = -EAGAIN; + if (fp->fi_had_conflict) + goto out_unlock; + /* Race breaker */ + if (fp->fi_lease) { + status = 0; + atomic_inc(&fp->fi_delegees); + hash_delegation_locked(dp, fp); + goto out_unlock; + } fp->fi_lease = fl; - fp->fi_deleg_file = get_file(fl->fl_file); + fp->fi_deleg_file = filp; atomic_set(&fp->fi_delegees, 1); - spin_lock(&state_lock); hash_delegation_locked(dp, fp); + spin_unlock(&fp->fi_lock); spin_unlock(&state_lock); return 0; -out_free: - locks_free_lock(fl); +out_unlock: + spin_unlock(&fp->fi_lock); + spin_unlock(&state_lock); +out_fput: + fput(filp); return status; } -static int nfs4_set_delegation(struct nfs4_delegation *dp, struct nfs4_file *fp) +static struct nfs4_delegation * +nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh, + struct nfs4_file *fp) { + int status; + struct nfs4_delegation *dp; + if (fp->fi_had_conflict) - return -EAGAIN; + return ERR_PTR(-EAGAIN); + + dp = alloc_init_deleg(clp, fh); + if (!dp) + return ERR_PTR(-ENOMEM); + get_nfs4_file(fp); - dp->dl_file = fp; - if (!fp->fi_lease) - return nfs4_setlease(dp); spin_lock(&state_lock); + spin_lock(&fp->fi_lock); + dp->dl_stid.sc_file = fp; + if (!fp->fi_lease) { + spin_unlock(&fp->fi_lock); + spin_unlock(&state_lock); + status = nfs4_setlease(dp); + goto out; + } atomic_inc(&fp->fi_delegees); if (fp->fi_had_conflict) { - spin_unlock(&state_lock); - return -EAGAIN; + status = -EAGAIN; + goto out_unlock; } hash_delegation_locked(dp, fp); + status = 0; +out_unlock: + spin_unlock(&fp->fi_lock); spin_unlock(&state_lock); - return 0; +out: + if (status) { + nfs4_put_stid(&dp->dl_stid); + return ERR_PTR(status); + } + return dp; } static void nfsd4_open_deleg_none_ext(struct nfsd4_open *open, int status) @@ -3212,11 +3889,12 @@ static void nfsd4_open_deleg_none_ext(struct nfsd4_open *open, int status) * proper support for them. */ static void -nfs4_open_delegation(struct net *net, struct svc_fh *fh, - struct nfsd4_open *open, struct nfs4_ol_stateid *stp) +nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, + struct nfs4_ol_stateid *stp) { struct nfs4_delegation *dp; - struct nfs4_openowner *oo = container_of(stp->st_stateowner, struct nfs4_openowner, oo_owner); + struct nfs4_openowner *oo = openowner(stp->st_stateowner); + struct nfs4_client *clp = stp->st_stid.sc_client; int cb_up; int status = 0; @@ -3235,7 +3913,7 @@ nfs4_open_delegation(struct net *net, struct svc_fh *fh, * Let's not give out any delegations till everyone's * had the chance to reclaim theirs.... */ - if (locks_in_grace(net)) + if (locks_in_grace(clp->net)) goto out_no_deleg; if (!cb_up || !(oo->oo_flags & NFS4_OO_CONFIRMED)) goto out_no_deleg; @@ -3254,21 +3932,17 @@ nfs4_open_delegation(struct net *net, struct svc_fh *fh, default: goto out_no_deleg; } - dp = alloc_init_deleg(oo->oo_owner.so_client, stp, fh); - if (dp == NULL) + dp = nfs4_set_delegation(clp, fh, stp->st_stid.sc_file); + if (IS_ERR(dp)) goto out_no_deleg; - status = nfs4_set_delegation(dp, stp->st_file); - if (status) - goto out_free; memcpy(&open->op_delegate_stateid, &dp->dl_stid.sc_stateid, sizeof(dp->dl_stid.sc_stateid)); dprintk("NFSD: delegation stateid=" STATEID_FMT "\n", STATEID_VAL(&dp->dl_stid.sc_stateid)); open->op_delegate_type = NFS4_OPEN_DELEGATE_READ; + nfs4_put_stid(&dp->dl_stid); return; -out_free: - destroy_delegation(dp); out_no_deleg: open->op_delegate_type = NFS4_OPEN_DELEGATE_NONE; if (open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS && @@ -3301,16 +3975,12 @@ static void nfsd4_deleg_xgrade_none_ext(struct nfsd4_open *open, */ } -/* - * called with nfs4_lock_state() held. - */ __be32 nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open) { struct nfsd4_compoundres *resp = rqstp->rq_resp; struct nfs4_client *cl = open->op_openowner->oo_owner.so_client; struct nfs4_file *fp = NULL; - struct inode *ino = current_fh->fh_dentry->d_inode; struct nfs4_ol_stateid *stp = NULL; struct nfs4_delegation *dp = NULL; __be32 status; @@ -3320,21 +3990,18 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf * and check for delegations in the process of being recalled. * If not found, create the nfs4_file struct */ - fp = find_file(ino); - if (fp) { - if ((status = nfs4_check_open(fp, open, &stp))) - goto out; + fp = find_or_add_file(open->op_file, ¤t_fh->fh_handle); + if (fp != open->op_file) { status = nfs4_check_deleg(cl, open, &dp); if (status) goto out; + stp = nfsd4_find_existing_open(fp, open); } else { + open->op_file = NULL; status = nfserr_bad_stateid; if (nfsd4_is_deleg_cur(open)) goto out; status = nfserr_jukebox; - fp = open->op_file; - open->op_file = NULL; - nfsd4_init_file(fp, ino); } /* @@ -3347,22 +4014,19 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf if (status) goto out; } else { - status = nfs4_get_vfs_file(rqstp, fp, current_fh, open); - if (status) - goto out; - status = nfsd4_truncate(rqstp, current_fh, open); - if (status) - goto out; stp = open->op_stp; open->op_stp = NULL; init_open_stateid(stp, fp, open); + status = nfs4_get_vfs_file(rqstp, fp, current_fh, stp, open); + if (status) { + release_open_stateid(stp); + goto out; + } } update_stateid(&stp->st_stid.sc_stateid); memcpy(&open->op_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); if (nfsd4_has_session(&resp->cstate)) { - open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED; - if (open->op_deleg_want & NFS4_SHARE_WANT_NO_DELEG) { open->op_delegate_type = NFS4_OPEN_DELEGATE_NONE_EXT; open->op_why_no_deleg = WND4_NOT_WANTED; @@ -3374,7 +4038,7 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf * Attempt to hand out a delegation. No error return, because the * OPEN succeeds even if we fail. */ - nfs4_open_delegation(SVC_NET(rqstp), current_fh, open, stp); + nfs4_open_delegation(current_fh, open, stp); nodeleg: status = nfs_ok; @@ -3397,41 +4061,27 @@ out: if (!(open->op_openowner->oo_flags & NFS4_OO_CONFIRMED) && !nfsd4_has_session(&resp->cstate)) open->op_rflags |= NFS4_OPEN_RESULT_CONFIRM; + if (dp) + nfs4_put_stid(&dp->dl_stid); + if (stp) + nfs4_put_stid(&stp->st_stid); return status; } -void nfsd4_cleanup_open_state(struct nfsd4_open *open, __be32 status) +void nfsd4_cleanup_open_state(struct nfsd4_compound_state *cstate, + struct nfsd4_open *open, __be32 status) { if (open->op_openowner) { - struct nfs4_openowner *oo = open->op_openowner; + struct nfs4_stateowner *so = &open->op_openowner->oo_owner; - if (!list_empty(&oo->oo_owner.so_stateids)) - list_del_init(&oo->oo_close_lru); - if (oo->oo_flags & NFS4_OO_NEW) { - if (status) { - release_openowner(oo); - open->op_openowner = NULL; - } else - oo->oo_flags &= ~NFS4_OO_NEW; - } + nfsd4_cstate_assign_replay(cstate, so); + nfs4_put_stateowner(so); } if (open->op_file) nfsd4_free_file(open->op_file); if (open->op_stp) - free_generic_stateid(open->op_stp); -} - -static __be32 lookup_clientid(clientid_t *clid, bool session, struct nfsd_net *nn, struct nfs4_client **clp) -{ - struct nfs4_client *found; - - if (STALE_CLIENTID(clid, nn)) - return nfserr_stale_clientid; - found = find_confirmed_client(clid, session, nn); - if (clp) - *clp = found; - return found ? nfs_ok : nfserr_expired; + nfs4_put_stid(&open->op_stp->st_stid); } __be32 @@ -3442,19 +4092,18 @@ nfsd4_renew(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, __be32 status; struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); - nfs4_lock_state(); dprintk("process_renew(%08x/%08x): starting\n", clid->cl_boot, clid->cl_id); - status = lookup_clientid(clid, cstate->minorversion, nn, &clp); + status = lookup_clientid(clid, cstate, nn); if (status) goto out; + clp = cstate->clp; status = nfserr_cb_path_down; if (!list_empty(&clp->cl_delegations) && clp->cl_cb_state != NFSD4_CB_UP) goto out; status = nfs_ok; out: - nfs4_unlock_state(); return status; } @@ -3483,12 +4132,11 @@ nfs4_laundromat(struct nfsd_net *nn) struct nfs4_client *clp; struct nfs4_openowner *oo; struct nfs4_delegation *dp; + struct nfs4_ol_stateid *stp; struct list_head *pos, *next, reaplist; time_t cutoff = get_seconds() - nn->nfsd4_lease; time_t t, new_timeo = nn->nfsd4_lease; - nfs4_lock_state(); - dprintk("NFSD: laundromat service - starting\n"); nfsd4_end_grace(nn); INIT_LIST_HEAD(&reaplist); @@ -3505,13 +4153,14 @@ nfs4_laundromat(struct nfsd_net *nn) clp->cl_clientid.cl_id); continue; } - list_move(&clp->cl_lru, &reaplist); + list_add(&clp->cl_lru, &reaplist); } spin_unlock(&nn->client_lock); list_for_each_safe(pos, next, &reaplist) { clp = list_entry(pos, struct nfs4_client, cl_lru); dprintk("NFSD: purging unused client (clientid %08x)\n", clp->cl_clientid.cl_id); + list_del_init(&clp->cl_lru); expire_client(clp); } spin_lock(&state_lock); @@ -3524,24 +4173,37 @@ nfs4_laundromat(struct nfsd_net *nn) new_timeo = min(new_timeo, t); break; } - list_move(&dp->dl_recall_lru, &reaplist); + unhash_delegation_locked(dp); + list_add(&dp->dl_recall_lru, &reaplist); } spin_unlock(&state_lock); - list_for_each_safe(pos, next, &reaplist) { - dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru); + while (!list_empty(&reaplist)) { + dp = list_first_entry(&reaplist, struct nfs4_delegation, + dl_recall_lru); + list_del_init(&dp->dl_recall_lru); revoke_delegation(dp); } - list_for_each_safe(pos, next, &nn->close_lru) { - oo = container_of(pos, struct nfs4_openowner, oo_close_lru); - if (time_after((unsigned long)oo->oo_time, (unsigned long)cutoff)) { + + spin_lock(&nn->client_lock); + while (!list_empty(&nn->close_lru)) { + oo = list_first_entry(&nn->close_lru, struct nfs4_openowner, + oo_close_lru); + if (time_after((unsigned long)oo->oo_time, + (unsigned long)cutoff)) { t = oo->oo_time - cutoff; new_timeo = min(new_timeo, t); break; } - release_openowner(oo); + list_del_init(&oo->oo_close_lru); + stp = oo->oo_last_closed_stid; + oo->oo_last_closed_stid = NULL; + spin_unlock(&nn->client_lock); + nfs4_put_stid(&stp->st_stid); + spin_lock(&nn->client_lock); } + spin_unlock(&nn->client_lock); + new_timeo = max_t(time_t, new_timeo, NFSD_LAUNDROMAT_MINTIMEOUT); - nfs4_unlock_state(); return new_timeo; } @@ -3564,7 +4226,7 @@ laundromat_main(struct work_struct *laundry) static inline __be32 nfs4_check_fh(struct svc_fh *fhp, struct nfs4_ol_stateid *stp) { - if (fhp->fh_dentry->d_inode != stp->st_file->fi_inode) + if (!nfsd_fh_match(&fhp->fh_handle, &stp->st_stid.sc_file->fi_fhandle)) return nfserr_bad_stateid; return nfs_ok; } @@ -3666,10 +4328,10 @@ static __be32 nfsd4_validate_stateid(struct nfs4_client *cl, stateid_t *stateid) { struct nfs4_stid *s; struct nfs4_ol_stateid *ols; - __be32 status; + __be32 status = nfserr_bad_stateid; if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) - return nfserr_bad_stateid; + return status; /* Client debugging aid. */ if (!same_clid(&stateid->si_opaque.so_clid, &cl->cl_clientid)) { char addr_str[INET6_ADDRSTRLEN]; @@ -3677,53 +4339,62 @@ static __be32 nfsd4_validate_stateid(struct nfs4_client *cl, stateid_t *stateid) sizeof(addr_str)); pr_warn_ratelimited("NFSD: client %s testing state ID " "with incorrect client ID\n", addr_str); - return nfserr_bad_stateid; + return status; } - s = find_stateid(cl, stateid); + spin_lock(&cl->cl_lock); + s = find_stateid_locked(cl, stateid); if (!s) - return nfserr_bad_stateid; + goto out_unlock; status = check_stateid_generation(stateid, &s->sc_stateid, 1); if (status) - return status; + goto out_unlock; switch (s->sc_type) { case NFS4_DELEG_STID: - return nfs_ok; + status = nfs_ok; + break; case NFS4_REVOKED_DELEG_STID: - return nfserr_deleg_revoked; + status = nfserr_deleg_revoked; + break; case NFS4_OPEN_STID: case NFS4_LOCK_STID: ols = openlockstateid(s); if (ols->st_stateowner->so_is_open_owner && !(openowner(ols->st_stateowner)->oo_flags & NFS4_OO_CONFIRMED)) - return nfserr_bad_stateid; - return nfs_ok; + status = nfserr_bad_stateid; + else + status = nfs_ok; + break; default: printk("unknown stateid type %x\n", s->sc_type); + /* Fallthrough */ case NFS4_CLOSED_STID: - return nfserr_bad_stateid; + case NFS4_CLOSED_DELEG_STID: + status = nfserr_bad_stateid; } +out_unlock: + spin_unlock(&cl->cl_lock); + return status; } -static __be32 nfsd4_lookup_stateid(stateid_t *stateid, unsigned char typemask, - struct nfs4_stid **s, bool sessions, - struct nfsd_net *nn) +static __be32 +nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate, + stateid_t *stateid, unsigned char typemask, + struct nfs4_stid **s, struct nfsd_net *nn) { - struct nfs4_client *cl; __be32 status; if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) return nfserr_bad_stateid; - status = lookup_clientid(&stateid->si_opaque.so_clid, sessions, - nn, &cl); + status = lookup_clientid(&stateid->si_opaque.so_clid, cstate, nn); if (status == nfserr_stale_clientid) { - if (sessions) + if (cstate->session) return nfserr_bad_stateid; return nfserr_stale_stateid; } if (status) return status; - *s = find_stateid_by_type(cl, stateid, typemask); + *s = find_stateid_by_type(cstate->clp, stateid, typemask); if (!*s) return nfserr_bad_stateid; return nfs_ok; @@ -3754,12 +4425,11 @@ nfs4_preprocess_stateid_op(struct net *net, struct nfsd4_compound_state *cstate, if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) return check_special_stateids(net, current_fh, stateid, flags); - nfs4_lock_state(); - - status = nfsd4_lookup_stateid(stateid, NFS4_DELEG_STID|NFS4_OPEN_STID|NFS4_LOCK_STID, - &s, cstate->minorversion, nn); + status = nfsd4_lookup_stateid(cstate, stateid, + NFS4_DELEG_STID|NFS4_OPEN_STID|NFS4_LOCK_STID, + &s, nn); if (status) - goto out; + return status; status = check_stateid_generation(stateid, &s->sc_stateid, nfsd4_has_session(cstate)); if (status) goto out; @@ -3770,12 +4440,13 @@ nfs4_preprocess_stateid_op(struct net *net, struct nfsd4_compound_state *cstate, if (status) goto out; if (filpp) { - file = dp->dl_file->fi_deleg_file; + file = dp->dl_stid.sc_file->fi_deleg_file; if (!file) { WARN_ON_ONCE(1); status = nfserr_serverfault; goto out; } + get_file(file); } break; case NFS4_OPEN_STID: @@ -3791,10 +4462,12 @@ nfs4_preprocess_stateid_op(struct net *net, struct nfsd4_compound_state *cstate, if (status) goto out; if (filpp) { + struct nfs4_file *fp = stp->st_stid.sc_file; + if (flags & RD_STATE) - file = find_readable_file(stp->st_file); + file = find_readable_file(fp); else - file = find_writeable_file(stp->st_file); + file = find_writeable_file(fp); } break; default: @@ -3803,28 +4476,12 @@ nfs4_preprocess_stateid_op(struct net *net, struct nfsd4_compound_state *cstate, } status = nfs_ok; if (file) - *filpp = get_file(file); + *filpp = file; out: - nfs4_unlock_state(); + nfs4_put_stid(s); return status; } -static __be32 -nfsd4_free_lock_stateid(struct nfs4_ol_stateid *stp) -{ - struct nfs4_lockowner *lo = lockowner(stp->st_stateowner); - - if (check_for_locks(stp->st_file, lo)) - return nfserr_locks_held; - /* - * Currently there's a 1-1 lock stateid<->lockowner - * correspondance, and we have to delete the lockowner when we - * delete the lock stateid: - */ - release_lockowner(lo); - return nfs_ok; -} - /* * Test if the stateid is valid */ @@ -3835,11 +4492,9 @@ nfsd4_test_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_test_stateid_id *stateid; struct nfs4_client *cl = cstate->session->se_client; - nfs4_lock_state(); list_for_each_entry(stateid, &test_stateid->ts_stateid_list, ts_id_list) stateid->ts_id_status = nfsd4_validate_stateid(cl, &stateid->ts_id_stateid); - nfs4_unlock_state(); return nfs_ok; } @@ -3851,37 +4506,50 @@ nfsd4_free_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, stateid_t *stateid = &free_stateid->fr_stateid; struct nfs4_stid *s; struct nfs4_delegation *dp; + struct nfs4_ol_stateid *stp; struct nfs4_client *cl = cstate->session->se_client; __be32 ret = nfserr_bad_stateid; - nfs4_lock_state(); - s = find_stateid(cl, stateid); + spin_lock(&cl->cl_lock); + s = find_stateid_locked(cl, stateid); if (!s) - goto out; + goto out_unlock; switch (s->sc_type) { case NFS4_DELEG_STID: ret = nfserr_locks_held; - goto out; + break; case NFS4_OPEN_STID: - case NFS4_LOCK_STID: ret = check_stateid_generation(stateid, &s->sc_stateid, 1); if (ret) - goto out; - if (s->sc_type == NFS4_LOCK_STID) - ret = nfsd4_free_lock_stateid(openlockstateid(s)); - else - ret = nfserr_locks_held; + break; + ret = nfserr_locks_held; break; + case NFS4_LOCK_STID: + ret = check_stateid_generation(stateid, &s->sc_stateid, 1); + if (ret) + break; + stp = openlockstateid(s); + ret = nfserr_locks_held; + if (check_for_locks(stp->st_stid.sc_file, + lockowner(stp->st_stateowner))) + break; + unhash_lock_stateid(stp); + spin_unlock(&cl->cl_lock); + nfs4_put_stid(s); + ret = nfs_ok; + goto out; case NFS4_REVOKED_DELEG_STID: dp = delegstateid(s); - destroy_revoked_delegation(dp); + list_del_init(&dp->dl_recall_lru); + spin_unlock(&cl->cl_lock); + nfs4_put_stid(s); ret = nfs_ok; - break; - default: - ret = nfserr_bad_stateid; + goto out; + /* Default falls through and returns nfserr_bad_stateid */ } +out_unlock: + spin_unlock(&cl->cl_lock); out: - nfs4_unlock_state(); return ret; } @@ -3926,20 +4594,24 @@ nfs4_preprocess_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid, { __be32 status; struct nfs4_stid *s; + struct nfs4_ol_stateid *stp = NULL; dprintk("NFSD: %s: seqid=%d stateid = " STATEID_FMT "\n", __func__, seqid, STATEID_VAL(stateid)); *stpp = NULL; - status = nfsd4_lookup_stateid(stateid, typemask, &s, - cstate->minorversion, nn); + status = nfsd4_lookup_stateid(cstate, stateid, typemask, &s, nn); if (status) return status; - *stpp = openlockstateid(s); - if (!nfsd4_has_session(cstate)) - cstate->replay_owner = (*stpp)->st_stateowner; + stp = openlockstateid(s); + nfsd4_cstate_assign_replay(cstate, stp->st_stateowner); - return nfs4_seqid_op_checks(cstate, stateid, seqid, *stpp); + status = nfs4_seqid_op_checks(cstate, stateid, seqid, stp); + if (!status) + *stpp = stp; + else + nfs4_put_stid(&stp->st_stid); + return status; } static __be32 nfs4_preprocess_confirmed_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid, @@ -3947,14 +4619,18 @@ static __be32 nfs4_preprocess_confirmed_seqid_op(struct nfsd4_compound_state *cs { __be32 status; struct nfs4_openowner *oo; + struct nfs4_ol_stateid *stp; status = nfs4_preprocess_seqid_op(cstate, seqid, stateid, - NFS4_OPEN_STID, stpp, nn); + NFS4_OPEN_STID, &stp, nn); if (status) return status; - oo = openowner((*stpp)->st_stateowner); - if (!(oo->oo_flags & NFS4_OO_CONFIRMED)) + oo = openowner(stp->st_stateowner); + if (!(oo->oo_flags & NFS4_OO_CONFIRMED)) { + nfs4_put_stid(&stp->st_stid); return nfserr_bad_stateid; + } + *stpp = stp; return nfs_ok; } @@ -3974,8 +4650,6 @@ nfsd4_open_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (status) return status; - nfs4_lock_state(); - status = nfs4_preprocess_seqid_op(cstate, oc->oc_seqid, &oc->oc_req_stateid, NFS4_OPEN_STID, &stp, nn); @@ -3984,7 +4658,7 @@ nfsd4_open_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, oo = openowner(stp->st_stateowner); status = nfserr_bad_stateid; if (oo->oo_flags & NFS4_OO_CONFIRMED) - goto out; + goto put_stateid; oo->oo_flags |= NFS4_OO_CONFIRMED; update_stateid(&stp->st_stid.sc_stateid); memcpy(&oc->oc_resp_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); @@ -3993,10 +4667,10 @@ nfsd4_open_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, nfsd4_client_record_create(oo->oo_owner.so_client); status = nfs_ok; +put_stateid: + nfs4_put_stid(&stp->st_stid); out: nfsd4_bump_seqid(cstate, status); - if (!cstate->replay_owner) - nfs4_unlock_state(); return status; } @@ -4004,7 +4678,7 @@ static inline void nfs4_stateid_downgrade_bit(struct nfs4_ol_stateid *stp, u32 a { if (!test_access(access, stp)) return; - nfs4_file_put_access(stp->st_file, nfs4_access_to_omode(access)); + nfs4_file_put_access(stp->st_stid.sc_file, access); clear_access(access, stp); } @@ -4026,16 +4700,6 @@ static inline void nfs4_stateid_downgrade(struct nfs4_ol_stateid *stp, u32 to_ac } } -static void -reset_union_bmap_deny(unsigned long deny, struct nfs4_ol_stateid *stp) -{ - int i; - for (i = 0; i < 4; i++) { - if ((i & deny) != i) - clear_deny(i, stp); - } -} - __be32 nfsd4_open_downgrade(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, @@ -4053,21 +4717,20 @@ nfsd4_open_downgrade(struct svc_rqst *rqstp, dprintk("NFSD: %s: od_deleg_want=0x%x ignored\n", __func__, od->od_deleg_want); - nfs4_lock_state(); status = nfs4_preprocess_confirmed_seqid_op(cstate, od->od_seqid, &od->od_stateid, &stp, nn); if (status) goto out; status = nfserr_inval; if (!test_access(od->od_share_access, stp)) { - dprintk("NFSD: access not a subset current bitmap: 0x%lx, input access=%08x\n", + dprintk("NFSD: access not a subset of current bitmap: 0x%hhx, input access=%08x\n", stp->st_access_bmap, od->od_share_access); - goto out; + goto put_stateid; } if (!test_deny(od->od_share_deny, stp)) { - dprintk("NFSD:deny not a subset current bitmap: 0x%lx, input deny=%08x\n", + dprintk("NFSD: deny not a subset of current bitmap: 0x%hhx, input deny=%08x\n", stp->st_deny_bmap, od->od_share_deny); - goto out; + goto put_stateid; } nfs4_stateid_downgrade(stp, od->od_share_access); @@ -4076,17 +4739,31 @@ nfsd4_open_downgrade(struct svc_rqst *rqstp, update_stateid(&stp->st_stid.sc_stateid); memcpy(&od->od_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); status = nfs_ok; +put_stateid: + nfs4_put_stid(&stp->st_stid); out: nfsd4_bump_seqid(cstate, status); - if (!cstate->replay_owner) - nfs4_unlock_state(); return status; } static void nfsd4_close_open_stateid(struct nfs4_ol_stateid *s) { - unhash_open_stateid(s); + struct nfs4_client *clp = s->st_stid.sc_client; + LIST_HEAD(reaplist); + s->st_stid.sc_type = NFS4_CLOSED_STID; + spin_lock(&clp->cl_lock); + unhash_open_stateid(s, &reaplist); + + if (clp->cl_minorversion) { + put_ol_stateid_locked(s, &reaplist); + spin_unlock(&clp->cl_lock); + free_ol_stateid_reaplist(&reaplist); + } else { + spin_unlock(&clp->cl_lock); + free_ol_stateid_reaplist(&reaplist); + move_to_close_lru(s, clp->net); + } } /* @@ -4097,7 +4774,6 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_close *close) { __be32 status; - struct nfs4_openowner *oo; struct nfs4_ol_stateid *stp; struct net *net = SVC_NET(rqstp); struct nfsd_net *nn = net_generic(net, nfsd_net_id); @@ -4105,7 +4781,6 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, dprintk("NFSD: nfsd4_close on file %pd\n", cstate->current_fh.fh_dentry); - nfs4_lock_state(); status = nfs4_preprocess_seqid_op(cstate, close->cl_seqid, &close->cl_stateid, NFS4_OPEN_STID|NFS4_CLOSED_STID, @@ -4113,31 +4788,14 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, nfsd4_bump_seqid(cstate, status); if (status) goto out; - oo = openowner(stp->st_stateowner); update_stateid(&stp->st_stid.sc_stateid); memcpy(&close->cl_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); nfsd4_close_open_stateid(stp); - if (cstate->minorversion) - free_generic_stateid(stp); - else - oo->oo_last_closed_stid = stp; - - if (list_empty(&oo->oo_owner.so_stateids)) { - if (cstate->minorversion) - release_openowner(oo); - else { - /* - * In the 4.0 case we need to keep the owners around a - * little while to handle CLOSE replay. - */ - move_to_close_lru(oo, SVC_NET(rqstp)); - } - } + /* put reference from nfs4_preprocess_seqid_op */ + nfs4_put_stid(&stp->st_stid); out: - if (!cstate->replay_owner) - nfs4_unlock_state(); return status; } @@ -4154,28 +4812,24 @@ nfsd4_delegreturn(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if ((status = fh_verify(rqstp, &cstate->current_fh, S_IFREG, 0))) return status; - nfs4_lock_state(); - status = nfsd4_lookup_stateid(stateid, NFS4_DELEG_STID, &s, - cstate->minorversion, nn); + status = nfsd4_lookup_stateid(cstate, stateid, NFS4_DELEG_STID, &s, nn); if (status) goto out; dp = delegstateid(s); status = check_stateid_generation(stateid, &dp->dl_stid.sc_stateid, nfsd4_has_session(cstate)); if (status) - goto out; + goto put_stateid; destroy_delegation(dp); +put_stateid: + nfs4_put_stid(&dp->dl_stid); out: - nfs4_unlock_state(); - return status; } #define LOFF_OVERFLOW(start, len) ((u64)(len) > ~(u64)(start)) -#define LOCKOWNER_INO_HASH_MASK (LOCKOWNER_INO_HASH_SIZE - 1) - static inline u64 end_offset(u64 start, u64 len) { @@ -4196,13 +4850,6 @@ last_byte_offset(u64 start, u64 len) return end > start ? end - 1: NFS4_MAX_UINT64; } -static unsigned int lockowner_ino_hashval(struct inode *inode, u32 cl_id, struct xdr_netobj *ownername) -{ - return (file_hashval(inode) + cl_id - + opaque_hashval(ownername->data, ownername->len)) - & LOCKOWNER_INO_HASH_MASK; -} - /* * TODO: Linux file offsets are _signed_ 64-bit quantities, which means that * we can't properly handle lock requests that go beyond the (2^63 - 1)-th @@ -4255,47 +4902,56 @@ nevermind: deny->ld_type = NFS4_WRITE_LT; } -static bool same_lockowner_ino(struct nfs4_lockowner *lo, struct inode *inode, clientid_t *clid, struct xdr_netobj *owner) +static struct nfs4_lockowner * +find_lockowner_str_locked(clientid_t *clid, struct xdr_netobj *owner, + struct nfs4_client *clp) { - struct nfs4_ol_stateid *lst; + unsigned int strhashval = ownerstr_hashval(owner); + struct nfs4_stateowner *so; - if (!same_owner_str(&lo->lo_owner, owner, clid)) - return false; - if (list_empty(&lo->lo_owner.so_stateids)) { - WARN_ON_ONCE(1); - return false; + lockdep_assert_held(&clp->cl_lock); + + list_for_each_entry(so, &clp->cl_ownerstr_hashtbl[strhashval], + so_strhash) { + if (so->so_is_open_owner) + continue; + if (!same_owner_str(so, owner)) + continue; + atomic_inc(&so->so_count); + return lockowner(so); } - lst = list_first_entry(&lo->lo_owner.so_stateids, - struct nfs4_ol_stateid, st_perstateowner); - return lst->st_file->fi_inode == inode; + return NULL; } static struct nfs4_lockowner * -find_lockowner_str(struct inode *inode, clientid_t *clid, - struct xdr_netobj *owner, struct nfsd_net *nn) +find_lockowner_str(clientid_t *clid, struct xdr_netobj *owner, + struct nfs4_client *clp) { - unsigned int hashval = lockowner_ino_hashval(inode, clid->cl_id, owner); struct nfs4_lockowner *lo; - list_for_each_entry(lo, &nn->lockowner_ino_hashtbl[hashval], lo_owner_ino_hash) { - if (same_lockowner_ino(lo, inode, clid, owner)) - return lo; - } - return NULL; + spin_lock(&clp->cl_lock); + lo = find_lockowner_str_locked(clid, owner, clp); + spin_unlock(&clp->cl_lock); + return lo; } -static void hash_lockowner(struct nfs4_lockowner *lo, unsigned int strhashval, struct nfs4_client *clp, struct nfs4_ol_stateid *open_stp) +static void nfs4_unhash_lockowner(struct nfs4_stateowner *sop) { - struct inode *inode = open_stp->st_file->fi_inode; - unsigned int inohash = lockowner_ino_hashval(inode, - clp->cl_clientid.cl_id, &lo->lo_owner.so_owner); - struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); + unhash_lockowner_locked(lockowner(sop)); +} + +static void nfs4_free_lockowner(struct nfs4_stateowner *sop) +{ + struct nfs4_lockowner *lo = lockowner(sop); - list_add(&lo->lo_owner.so_strhash, &nn->ownerstr_hashtbl[strhashval]); - list_add(&lo->lo_owner_ino_hash, &nn->lockowner_ino_hashtbl[inohash]); - list_add(&lo->lo_perstateid, &open_stp->st_lockowners); + kmem_cache_free(lockowner_slab, lo); } +static const struct nfs4_stateowner_operations lockowner_ops = { + .so_unhash = nfs4_unhash_lockowner, + .so_free = nfs4_free_lockowner, +}; + /* * Alloc a lock owner structure. * Called in nfsd4_lock - therefore, OPEN and OPEN_CONFIRM (if needed) has @@ -4303,42 +4959,107 @@ static void hash_lockowner(struct nfs4_lockowner *lo, unsigned int strhashval, s * * strhashval = ownerstr_hashval */ - static struct nfs4_lockowner * -alloc_init_lock_stateowner(unsigned int strhashval, struct nfs4_client *clp, struct nfs4_ol_stateid *open_stp, struct nfsd4_lock *lock) { - struct nfs4_lockowner *lo; +alloc_init_lock_stateowner(unsigned int strhashval, struct nfs4_client *clp, + struct nfs4_ol_stateid *open_stp, + struct nfsd4_lock *lock) +{ + struct nfs4_lockowner *lo, *ret; lo = alloc_stateowner(lockowner_slab, &lock->lk_new_owner, clp); if (!lo) return NULL; INIT_LIST_HEAD(&lo->lo_owner.so_stateids); lo->lo_owner.so_is_open_owner = 0; - /* It is the openowner seqid that will be incremented in encode in the - * case of new lockowners; so increment the lock seqid manually: */ - lo->lo_owner.so_seqid = lock->lk_new_lock_seqid + 1; - hash_lockowner(lo, strhashval, clp, open_stp); + lo->lo_owner.so_seqid = lock->lk_new_lock_seqid; + lo->lo_owner.so_ops = &lockowner_ops; + spin_lock(&clp->cl_lock); + ret = find_lockowner_str_locked(&clp->cl_clientid, + &lock->lk_new_owner, clp); + if (ret == NULL) { + list_add(&lo->lo_owner.so_strhash, + &clp->cl_ownerstr_hashtbl[strhashval]); + ret = lo; + } else + nfs4_free_lockowner(&lo->lo_owner); + spin_unlock(&clp->cl_lock); return lo; } -static struct nfs4_ol_stateid * -alloc_init_lock_stateid(struct nfs4_lockowner *lo, struct nfs4_file *fp, struct nfs4_ol_stateid *open_stp) +static void +init_lock_stateid(struct nfs4_ol_stateid *stp, struct nfs4_lockowner *lo, + struct nfs4_file *fp, struct inode *inode, + struct nfs4_ol_stateid *open_stp) { - struct nfs4_ol_stateid *stp; struct nfs4_client *clp = lo->lo_owner.so_client; - stp = nfs4_alloc_stateid(clp); - if (stp == NULL) - return NULL; + lockdep_assert_held(&clp->cl_lock); + + atomic_inc(&stp->st_stid.sc_count); stp->st_stid.sc_type = NFS4_LOCK_STID; - list_add(&stp->st_perfile, &fp->fi_stateids); - list_add(&stp->st_perstateowner, &lo->lo_owner.so_stateids); stp->st_stateowner = &lo->lo_owner; + atomic_inc(&lo->lo_owner.so_count); get_nfs4_file(fp); - stp->st_file = fp; + stp->st_stid.sc_file = fp; + stp->st_stid.sc_free = nfs4_free_lock_stateid; stp->st_access_bmap = 0; stp->st_deny_bmap = open_stp->st_deny_bmap; stp->st_openstp = open_stp; - return stp; + list_add(&stp->st_locks, &open_stp->st_locks); + list_add(&stp->st_perstateowner, &lo->lo_owner.so_stateids); + spin_lock(&fp->fi_lock); + list_add(&stp->st_perfile, &fp->fi_stateids); + spin_unlock(&fp->fi_lock); +} + +static struct nfs4_ol_stateid * +find_lock_stateid(struct nfs4_lockowner *lo, struct nfs4_file *fp) +{ + struct nfs4_ol_stateid *lst; + struct nfs4_client *clp = lo->lo_owner.so_client; + + lockdep_assert_held(&clp->cl_lock); + + list_for_each_entry(lst, &lo->lo_owner.so_stateids, st_perstateowner) { + if (lst->st_stid.sc_file == fp) { + atomic_inc(&lst->st_stid.sc_count); + return lst; + } + } + return NULL; +} + +static struct nfs4_ol_stateid * +find_or_create_lock_stateid(struct nfs4_lockowner *lo, struct nfs4_file *fi, + struct inode *inode, struct nfs4_ol_stateid *ost, + bool *new) +{ + struct nfs4_stid *ns = NULL; + struct nfs4_ol_stateid *lst; + struct nfs4_openowner *oo = openowner(ost->st_stateowner); + struct nfs4_client *clp = oo->oo_owner.so_client; + + spin_lock(&clp->cl_lock); + lst = find_lock_stateid(lo, fi); + if (lst == NULL) { + spin_unlock(&clp->cl_lock); + ns = nfs4_alloc_stid(clp, stateid_slab); + if (ns == NULL) + return NULL; + + spin_lock(&clp->cl_lock); + lst = find_lock_stateid(lo, fi); + if (likely(!lst)) { + lst = openlockstateid(ns); + init_lock_stateid(lst, lo, fi, inode, ost); + ns = NULL; + *new = true; + } + } + spin_unlock(&clp->cl_lock); + if (ns) + nfs4_put_stid(ns); + return lst; } static int @@ -4350,46 +5071,53 @@ check_lock_length(u64 offset, u64 length) static void get_lock_access(struct nfs4_ol_stateid *lock_stp, u32 access) { - struct nfs4_file *fp = lock_stp->st_file; - int oflag = nfs4_access_to_omode(access); + struct nfs4_file *fp = lock_stp->st_stid.sc_file; + + lockdep_assert_held(&fp->fi_lock); if (test_access(access, lock_stp)) return; - nfs4_file_get_access(fp, oflag); + __nfs4_file_get_access(fp, access); set_access(access, lock_stp); } -static __be32 lookup_or_create_lock_state(struct nfsd4_compound_state *cstate, struct nfs4_ol_stateid *ost, struct nfsd4_lock *lock, struct nfs4_ol_stateid **lst, bool *new) +static __be32 +lookup_or_create_lock_state(struct nfsd4_compound_state *cstate, + struct nfs4_ol_stateid *ost, + struct nfsd4_lock *lock, + struct nfs4_ol_stateid **lst, bool *new) { - struct nfs4_file *fi = ost->st_file; + __be32 status; + struct nfs4_file *fi = ost->st_stid.sc_file; struct nfs4_openowner *oo = openowner(ost->st_stateowner); struct nfs4_client *cl = oo->oo_owner.so_client; + struct inode *inode = cstate->current_fh.fh_dentry->d_inode; struct nfs4_lockowner *lo; unsigned int strhashval; - struct nfsd_net *nn = net_generic(cl->net, nfsd_net_id); - - lo = find_lockowner_str(fi->fi_inode, &cl->cl_clientid, - &lock->v.new.owner, nn); - if (lo) { - if (!cstate->minorversion) - return nfserr_bad_seqid; - /* XXX: a lockowner always has exactly one stateid: */ - *lst = list_first_entry(&lo->lo_owner.so_stateids, - struct nfs4_ol_stateid, st_perstateowner); - return nfs_ok; + + lo = find_lockowner_str(&cl->cl_clientid, &lock->v.new.owner, cl); + if (!lo) { + strhashval = ownerstr_hashval(&lock->v.new.owner); + lo = alloc_init_lock_stateowner(strhashval, cl, ost, lock); + if (lo == NULL) + return nfserr_jukebox; + } else { + /* with an existing lockowner, seqids must be the same */ + status = nfserr_bad_seqid; + if (!cstate->minorversion && + lock->lk_new_lock_seqid != lo->lo_owner.so_seqid) + goto out; } - strhashval = ownerstr_hashval(cl->cl_clientid.cl_id, - &lock->v.new.owner); - lo = alloc_init_lock_stateowner(strhashval, cl, ost, lock); - if (lo == NULL) - return nfserr_jukebox; - *lst = alloc_init_lock_stateid(lo, fi, ost); + + *lst = find_or_create_lock_stateid(lo, fi, inode, ost, new); if (*lst == NULL) { - release_lockowner(lo); - return nfserr_jukebox; + status = nfserr_jukebox; + goto out; } - *new = true; - return nfs_ok; + status = nfs_ok; +out: + nfs4_put_stateowner(&lo->lo_owner); + return status; } /* @@ -4401,14 +5129,16 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, { struct nfs4_openowner *open_sop = NULL; struct nfs4_lockowner *lock_sop = NULL; - struct nfs4_ol_stateid *lock_stp; + struct nfs4_ol_stateid *lock_stp = NULL; + struct nfs4_ol_stateid *open_stp = NULL; + struct nfs4_file *fp; struct file *filp = NULL; struct file_lock *file_lock = NULL; struct file_lock *conflock = NULL; __be32 status = 0; - bool new_state = false; int lkflg; int err; + bool new = false; struct net *net = SVC_NET(rqstp); struct nfsd_net *nn = net_generic(net, nfsd_net_id); @@ -4425,11 +5155,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, return status; } - nfs4_lock_state(); - if (lock->lk_is_new) { - struct nfs4_ol_stateid *open_stp = NULL; - if (nfsd4_has_session(cstate)) /* See rfc 5661 18.10.3: given clientid is ignored: */ memcpy(&lock->v.new.clientid, @@ -4453,12 +5179,13 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, &lock->v.new.clientid)) goto out; status = lookup_or_create_lock_state(cstate, open_stp, lock, - &lock_stp, &new_state); - } else + &lock_stp, &new); + } else { status = nfs4_preprocess_seqid_op(cstate, lock->lk_old_lock_seqid, &lock->lk_old_lock_stateid, NFS4_LOCK_STID, &lock_stp, nn); + } if (status) goto out; lock_sop = lockowner(lock_stp->st_stateowner); @@ -4482,20 +5209,25 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, goto out; } + fp = lock_stp->st_stid.sc_file; locks_init_lock(file_lock); switch (lock->lk_type) { case NFS4_READ_LT: case NFS4_READW_LT: - filp = find_readable_file(lock_stp->st_file); + spin_lock(&fp->fi_lock); + filp = find_readable_file_locked(fp); if (filp) get_lock_access(lock_stp, NFS4_SHARE_ACCESS_READ); + spin_unlock(&fp->fi_lock); file_lock->fl_type = F_RDLCK; break; case NFS4_WRITE_LT: case NFS4_WRITEW_LT: - filp = find_writeable_file(lock_stp->st_file); + spin_lock(&fp->fi_lock); + filp = find_writeable_file_locked(fp); if (filp) get_lock_access(lock_stp, NFS4_SHARE_ACCESS_WRITE); + spin_unlock(&fp->fi_lock); file_lock->fl_type = F_WRLCK; break; default: @@ -4544,11 +5276,27 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, break; } out: - if (status && new_state) - release_lockowner(lock_sop); + if (filp) + fput(filp); + if (lock_stp) { + /* Bump seqid manually if the 4.0 replay owner is openowner */ + if (cstate->replay_owner && + cstate->replay_owner != &lock_sop->lo_owner && + seqid_mutating_err(ntohl(status))) + lock_sop->lo_owner.so_seqid++; + + /* + * If this is a new, never-before-used stateid, and we are + * returning an error, then just go ahead and release it. + */ + if (status && new) + release_lock_stateid(lock_stp); + + nfs4_put_stid(&lock_stp->st_stid); + } + if (open_stp) + nfs4_put_stid(&open_stp->st_stid); nfsd4_bump_seqid(cstate, status); - if (!cstate->replay_owner) - nfs4_unlock_state(); if (file_lock) locks_free_lock(file_lock); if (conflock) @@ -4580,9 +5328,8 @@ __be32 nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_lockt *lockt) { - struct inode *inode; struct file_lock *file_lock = NULL; - struct nfs4_lockowner *lo; + struct nfs4_lockowner *lo = NULL; __be32 status; struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); @@ -4592,10 +5339,8 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (check_lock_length(lockt->lt_offset, lockt->lt_length)) return nfserr_inval; - nfs4_lock_state(); - if (!nfsd4_has_session(cstate)) { - status = lookup_clientid(&lockt->lt_clientid, false, nn, NULL); + status = lookup_clientid(&lockt->lt_clientid, cstate, nn); if (status) goto out; } @@ -4603,7 +5348,6 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if ((status = fh_verify(rqstp, &cstate->current_fh, S_IFREG, 0))) goto out; - inode = cstate->current_fh.fh_dentry->d_inode; file_lock = locks_alloc_lock(); if (!file_lock) { dprintk("NFSD: %s: unable to allocate lock!\n", __func__); @@ -4626,7 +5370,8 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, goto out; } - lo = find_lockowner_str(inode, &lockt->lt_clientid, &lockt->lt_owner, nn); + lo = find_lockowner_str(&lockt->lt_clientid, &lockt->lt_owner, + cstate->clp); if (lo) file_lock->fl_owner = (fl_owner_t)lo; file_lock->fl_pid = current->tgid; @@ -4646,7 +5391,8 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, nfs4_set_lock_denied(file_lock, &lockt->lt_denied); } out: - nfs4_unlock_state(); + if (lo) + nfs4_put_stateowner(&lo->lo_owner); if (file_lock) locks_free_lock(file_lock); return status; @@ -4670,23 +5416,21 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (check_lock_length(locku->lu_offset, locku->lu_length)) return nfserr_inval; - nfs4_lock_state(); - status = nfs4_preprocess_seqid_op(cstate, locku->lu_seqid, &locku->lu_stateid, NFS4_LOCK_STID, &stp, nn); if (status) goto out; - filp = find_any_file(stp->st_file); + filp = find_any_file(stp->st_stid.sc_file); if (!filp) { status = nfserr_lock_range; - goto out; + goto put_stateid; } file_lock = locks_alloc_lock(); if (!file_lock) { dprintk("NFSD: %s: unable to allocate lock!\n", __func__); status = nfserr_jukebox; - goto out; + goto fput; } locks_init_lock(file_lock); file_lock->fl_type = F_UNLCK; @@ -4708,41 +5452,51 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, } update_stateid(&stp->st_stid.sc_stateid); memcpy(&locku->lu_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); - +fput: + fput(filp); +put_stateid: + nfs4_put_stid(&stp->st_stid); out: nfsd4_bump_seqid(cstate, status); - if (!cstate->replay_owner) - nfs4_unlock_state(); if (file_lock) locks_free_lock(file_lock); return status; out_nfserr: status = nfserrno(err); - goto out; + goto fput; } /* * returns - * 1: locks held by lockowner - * 0: no locks held by lockowner + * true: locks held by lockowner + * false: no locks held by lockowner */ -static int -check_for_locks(struct nfs4_file *filp, struct nfs4_lockowner *lowner) +static bool +check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner) { struct file_lock **flpp; - struct inode *inode = filp->fi_inode; - int status = 0; + int status = false; + struct file *filp = find_any_file(fp); + struct inode *inode; + + if (!filp) { + /* Any valid lock stateid should have some sort of access */ + WARN_ON_ONCE(1); + return status; + } + + inode = file_inode(filp); spin_lock(&inode->i_lock); for (flpp = &inode->i_flock; *flpp != NULL; flpp = &(*flpp)->fl_next) { if ((*flpp)->fl_owner == (fl_owner_t)lowner) { - status = 1; - goto out; + status = true; + break; } } -out: spin_unlock(&inode->i_lock); + fput(filp); return status; } @@ -4753,53 +5507,46 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp, { clientid_t *clid = &rlockowner->rl_clientid; struct nfs4_stateowner *sop; - struct nfs4_lockowner *lo; + struct nfs4_lockowner *lo = NULL; struct nfs4_ol_stateid *stp; struct xdr_netobj *owner = &rlockowner->rl_owner; - struct list_head matches; - unsigned int hashval = ownerstr_hashval(clid->cl_id, owner); + unsigned int hashval = ownerstr_hashval(owner); __be32 status; struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); + struct nfs4_client *clp; dprintk("nfsd4_release_lockowner clientid: (%08x/%08x):\n", clid->cl_boot, clid->cl_id); - nfs4_lock_state(); - - status = lookup_clientid(clid, cstate->minorversion, nn, NULL); + status = lookup_clientid(clid, cstate, nn); if (status) - goto out; + return status; - status = nfserr_locks_held; - INIT_LIST_HEAD(&matches); + clp = cstate->clp; + /* Find the matching lock stateowner */ + spin_lock(&clp->cl_lock); + list_for_each_entry(sop, &clp->cl_ownerstr_hashtbl[hashval], + so_strhash) { - list_for_each_entry(sop, &nn->ownerstr_hashtbl[hashval], so_strhash) { - if (sop->so_is_open_owner) + if (sop->so_is_open_owner || !same_owner_str(sop, owner)) continue; - if (!same_owner_str(sop, owner, clid)) - continue; - list_for_each_entry(stp, &sop->so_stateids, - st_perstateowner) { - lo = lockowner(sop); - if (check_for_locks(stp->st_file, lo)) - goto out; - list_add(&lo->lo_list, &matches); + + /* see if there are still any locks associated with it */ + lo = lockowner(sop); + list_for_each_entry(stp, &sop->so_stateids, st_perstateowner) { + if (check_for_locks(stp->st_stid.sc_file, lo)) { + status = nfserr_locks_held; + spin_unlock(&clp->cl_lock); + return status; + } } + + atomic_inc(&sop->so_count); + break; } - /* Clients probably won't expect us to return with some (but not all) - * of the lockowner state released; so don't release any until all - * have been checked. */ - status = nfs_ok; - while (!list_empty(&matches)) { - lo = list_entry(matches.next, struct nfs4_lockowner, - lo_list); - /* unhash_stateowner deletes so_perclient only - * for openowners. */ - list_del(&lo->lo_list); + spin_unlock(&clp->cl_lock); + if (lo) release_lockowner(lo); - } -out: - nfs4_unlock_state(); return status; } @@ -4887,34 +5634,123 @@ nfsd4_find_reclaim_client(const char *recdir, struct nfsd_net *nn) * Called from OPEN. Look for clientid in reclaim list. */ __be32 -nfs4_check_open_reclaim(clientid_t *clid, bool sessions, struct nfsd_net *nn) +nfs4_check_open_reclaim(clientid_t *clid, + struct nfsd4_compound_state *cstate, + struct nfsd_net *nn) { - struct nfs4_client *clp; + __be32 status; /* find clientid in conf_id_hashtbl */ - clp = find_confirmed_client(clid, sessions, nn); - if (clp == NULL) + status = lookup_clientid(clid, cstate, nn); + if (status) return nfserr_reclaim_bad; - return nfsd4_client_record_check(clp) ? nfserr_reclaim_bad : nfs_ok; + if (nfsd4_client_record_check(cstate->clp)) + return nfserr_reclaim_bad; + + return nfs_ok; } #ifdef CONFIG_NFSD_FAULT_INJECTION +static inline void +put_client(struct nfs4_client *clp) +{ + atomic_dec(&clp->cl_refcount); +} -u64 nfsd_forget_client(struct nfs4_client *clp, u64 max) +static struct nfs4_client * +nfsd_find_client(struct sockaddr_storage *addr, size_t addr_size) { - if (mark_client_expired(clp)) - return 0; - expire_client(clp); - return 1; + struct nfs4_client *clp; + struct nfsd_net *nn = net_generic(current->nsproxy->net_ns, + nfsd_net_id); + + if (!nfsd_netns_ready(nn)) + return NULL; + + list_for_each_entry(clp, &nn->client_lru, cl_lru) { + if (memcmp(&clp->cl_addr, addr, addr_size) == 0) + return clp; + } + return NULL; } -u64 nfsd_print_client(struct nfs4_client *clp, u64 num) +u64 +nfsd_inject_print_clients(void) { + struct nfs4_client *clp; + u64 count = 0; + struct nfsd_net *nn = net_generic(current->nsproxy->net_ns, + nfsd_net_id); char buf[INET6_ADDRSTRLEN]; - rpc_ntop((struct sockaddr *)&clp->cl_addr, buf, sizeof(buf)); - printk(KERN_INFO "NFS Client: %s\n", buf); - return 1; + + if (!nfsd_netns_ready(nn)) + return 0; + + spin_lock(&nn->client_lock); + list_for_each_entry(clp, &nn->client_lru, cl_lru) { + rpc_ntop((struct sockaddr *)&clp->cl_addr, buf, sizeof(buf)); + pr_info("NFS Client: %s\n", buf); + ++count; + } + spin_unlock(&nn->client_lock); + + return count; +} + +u64 +nfsd_inject_forget_client(struct sockaddr_storage *addr, size_t addr_size) +{ + u64 count = 0; + struct nfs4_client *clp; + struct nfsd_net *nn = net_generic(current->nsproxy->net_ns, + nfsd_net_id); + + if (!nfsd_netns_ready(nn)) + return count; + + spin_lock(&nn->client_lock); + clp = nfsd_find_client(addr, addr_size); + if (clp) { + if (mark_client_expired_locked(clp) == nfs_ok) + ++count; + else + clp = NULL; + } + spin_unlock(&nn->client_lock); + + if (clp) + expire_client(clp); + + return count; +} + +u64 +nfsd_inject_forget_clients(u64 max) +{ + u64 count = 0; + struct nfs4_client *clp, *next; + struct nfsd_net *nn = net_generic(current->nsproxy->net_ns, + nfsd_net_id); + LIST_HEAD(reaplist); + + if (!nfsd_netns_ready(nn)) + return count; + + spin_lock(&nn->client_lock); + list_for_each_entry_safe(clp, next, &nn->client_lru, cl_lru) { + if (mark_client_expired_locked(clp) == nfs_ok) { + list_add(&clp->cl_lru, &reaplist); + if (max != 0 && ++count >= max) + break; + } + } + spin_unlock(&nn->client_lock); + + list_for_each_entry_safe(clp, next, &reaplist, cl_lru) + expire_client(clp); + + return count; } static void nfsd_print_count(struct nfs4_client *clp, unsigned int count, @@ -4925,158 +5761,484 @@ static void nfsd_print_count(struct nfs4_client *clp, unsigned int count, printk(KERN_INFO "NFS Client: %s has %u %s\n", buf, count, type); } -static u64 nfsd_foreach_client_lock(struct nfs4_client *clp, u64 max, void (*func)(struct nfs4_lockowner *)) +static void +nfsd_inject_add_lock_to_list(struct nfs4_ol_stateid *lst, + struct list_head *collect) +{ + struct nfs4_client *clp = lst->st_stid.sc_client; + struct nfsd_net *nn = net_generic(current->nsproxy->net_ns, + nfsd_net_id); + + if (!collect) + return; + + lockdep_assert_held(&nn->client_lock); + atomic_inc(&clp->cl_refcount); + list_add(&lst->st_locks, collect); +} + +static u64 nfsd_foreach_client_lock(struct nfs4_client *clp, u64 max, + struct list_head *collect, + void (*func)(struct nfs4_ol_stateid *)) { struct nfs4_openowner *oop; - struct nfs4_lockowner *lop, *lo_next; struct nfs4_ol_stateid *stp, *st_next; + struct nfs4_ol_stateid *lst, *lst_next; u64 count = 0; + spin_lock(&clp->cl_lock); list_for_each_entry(oop, &clp->cl_openowners, oo_perclient) { - list_for_each_entry_safe(stp, st_next, &oop->oo_owner.so_stateids, st_perstateowner) { - list_for_each_entry_safe(lop, lo_next, &stp->st_lockowners, lo_perstateid) { - if (func) - func(lop); - if (++count == max) - return count; + list_for_each_entry_safe(stp, st_next, + &oop->oo_owner.so_stateids, st_perstateowner) { + list_for_each_entry_safe(lst, lst_next, + &stp->st_locks, st_locks) { + if (func) { + func(lst); + nfsd_inject_add_lock_to_list(lst, + collect); + } + ++count; + /* + * Despite the fact that these functions deal + * with 64-bit integers for "count", we must + * ensure that it doesn't blow up the + * clp->cl_refcount. Throw a warning if we + * start to approach INT_MAX here. + */ + WARN_ON_ONCE(count == (INT_MAX / 2)); + if (count == max) + goto out; } } } +out: + spin_unlock(&clp->cl_lock); return count; } -u64 nfsd_forget_client_locks(struct nfs4_client *clp, u64 max) +static u64 +nfsd_collect_client_locks(struct nfs4_client *clp, struct list_head *collect, + u64 max) { - return nfsd_foreach_client_lock(clp, max, release_lockowner); + return nfsd_foreach_client_lock(clp, max, collect, unhash_lock_stateid); } -u64 nfsd_print_client_locks(struct nfs4_client *clp, u64 max) +static u64 +nfsd_print_client_locks(struct nfs4_client *clp) { - u64 count = nfsd_foreach_client_lock(clp, max, NULL); + u64 count = nfsd_foreach_client_lock(clp, 0, NULL, NULL); nfsd_print_count(clp, count, "locked files"); return count; } -static u64 nfsd_foreach_client_open(struct nfs4_client *clp, u64 max, void (*func)(struct nfs4_openowner *)) +u64 +nfsd_inject_print_locks(void) +{ + struct nfs4_client *clp; + u64 count = 0; + struct nfsd_net *nn = net_generic(current->nsproxy->net_ns, + nfsd_net_id); + + if (!nfsd_netns_ready(nn)) + return 0; + + spin_lock(&nn->client_lock); + list_for_each_entry(clp, &nn->client_lru, cl_lru) + count += nfsd_print_client_locks(clp); + spin_unlock(&nn->client_lock); + + return count; +} + +static void +nfsd_reap_locks(struct list_head *reaplist) +{ + struct nfs4_client *clp; + struct nfs4_ol_stateid *stp, *next; + + list_for_each_entry_safe(stp, next, reaplist, st_locks) { + list_del_init(&stp->st_locks); + clp = stp->st_stid.sc_client; + nfs4_put_stid(&stp->st_stid); + put_client(clp); + } +} + +u64 +nfsd_inject_forget_client_locks(struct sockaddr_storage *addr, size_t addr_size) +{ + unsigned int count = 0; + struct nfs4_client *clp; + struct nfsd_net *nn = net_generic(current->nsproxy->net_ns, + nfsd_net_id); + LIST_HEAD(reaplist); + + if (!nfsd_netns_ready(nn)) + return count; + + spin_lock(&nn->client_lock); + clp = nfsd_find_client(addr, addr_size); + if (clp) + count = nfsd_collect_client_locks(clp, &reaplist, 0); + spin_unlock(&nn->client_lock); + nfsd_reap_locks(&reaplist); + return count; +} + +u64 +nfsd_inject_forget_locks(u64 max) +{ + u64 count = 0; + struct nfs4_client *clp; + struct nfsd_net *nn = net_generic(current->nsproxy->net_ns, + nfsd_net_id); + LIST_HEAD(reaplist); + + if (!nfsd_netns_ready(nn)) + return count; + + spin_lock(&nn->client_lock); + list_for_each_entry(clp, &nn->client_lru, cl_lru) { + count += nfsd_collect_client_locks(clp, &reaplist, max - count); + if (max != 0 && count >= max) + break; + } + spin_unlock(&nn->client_lock); + nfsd_reap_locks(&reaplist); + return count; +} + +static u64 +nfsd_foreach_client_openowner(struct nfs4_client *clp, u64 max, + struct list_head *collect, + void (*func)(struct nfs4_openowner *)) { struct nfs4_openowner *oop, *next; + struct nfsd_net *nn = net_generic(current->nsproxy->net_ns, + nfsd_net_id); u64 count = 0; + lockdep_assert_held(&nn->client_lock); + + spin_lock(&clp->cl_lock); list_for_each_entry_safe(oop, next, &clp->cl_openowners, oo_perclient) { - if (func) + if (func) { func(oop); - if (++count == max) + if (collect) { + atomic_inc(&clp->cl_refcount); + list_add(&oop->oo_perclient, collect); + } + } + ++count; + /* + * Despite the fact that these functions deal with + * 64-bit integers for "count", we must ensure that + * it doesn't blow up the clp->cl_refcount. Throw a + * warning if we start to approach INT_MAX here. + */ + WARN_ON_ONCE(count == (INT_MAX / 2)); + if (count == max) break; } + spin_unlock(&clp->cl_lock); return count; } -u64 nfsd_forget_client_openowners(struct nfs4_client *clp, u64 max) +static u64 +nfsd_print_client_openowners(struct nfs4_client *clp) { - return nfsd_foreach_client_open(clp, max, release_openowner); + u64 count = nfsd_foreach_client_openowner(clp, 0, NULL, NULL); + + nfsd_print_count(clp, count, "openowners"); + return count; } -u64 nfsd_print_client_openowners(struct nfs4_client *clp, u64 max) +static u64 +nfsd_collect_client_openowners(struct nfs4_client *clp, + struct list_head *collect, u64 max) { - u64 count = nfsd_foreach_client_open(clp, max, NULL); - nfsd_print_count(clp, count, "open files"); - return count; + return nfsd_foreach_client_openowner(clp, max, collect, + unhash_openowner_locked); } -static u64 nfsd_find_all_delegations(struct nfs4_client *clp, u64 max, - struct list_head *victims) +u64 +nfsd_inject_print_openowners(void) { - struct nfs4_delegation *dp, *next; + struct nfs4_client *clp; u64 count = 0; + struct nfsd_net *nn = net_generic(current->nsproxy->net_ns, + nfsd_net_id); + + if (!nfsd_netns_ready(nn)) + return 0; + + spin_lock(&nn->client_lock); + list_for_each_entry(clp, &nn->client_lru, cl_lru) + count += nfsd_print_client_openowners(clp); + spin_unlock(&nn->client_lock); - lockdep_assert_held(&state_lock); - list_for_each_entry_safe(dp, next, &clp->cl_delegations, dl_perclnt) { - if (victims) - list_move(&dp->dl_recall_lru, victims); - if (++count == max) - break; - } return count; } -u64 nfsd_forget_client_delegations(struct nfs4_client *clp, u64 max) +static void +nfsd_reap_openowners(struct list_head *reaplist) { - struct nfs4_delegation *dp, *next; - LIST_HEAD(victims); - u64 count; + struct nfs4_client *clp; + struct nfs4_openowner *oop, *next; - spin_lock(&state_lock); - count = nfsd_find_all_delegations(clp, max, &victims); - spin_unlock(&state_lock); + list_for_each_entry_safe(oop, next, reaplist, oo_perclient) { + list_del_init(&oop->oo_perclient); + clp = oop->oo_owner.so_client; + release_openowner(oop); + put_client(clp); + } +} - list_for_each_entry_safe(dp, next, &victims, dl_recall_lru) - revoke_delegation(dp); +u64 +nfsd_inject_forget_client_openowners(struct sockaddr_storage *addr, + size_t addr_size) +{ + unsigned int count = 0; + struct nfs4_client *clp; + struct nfsd_net *nn = net_generic(current->nsproxy->net_ns, + nfsd_net_id); + LIST_HEAD(reaplist); + if (!nfsd_netns_ready(nn)) + return count; + + spin_lock(&nn->client_lock); + clp = nfsd_find_client(addr, addr_size); + if (clp) + count = nfsd_collect_client_openowners(clp, &reaplist, 0); + spin_unlock(&nn->client_lock); + nfsd_reap_openowners(&reaplist); return count; } -u64 nfsd_recall_client_delegations(struct nfs4_client *clp, u64 max) +u64 +nfsd_inject_forget_openowners(u64 max) { - struct nfs4_delegation *dp, *next; - LIST_HEAD(victims); - u64 count; + u64 count = 0; + struct nfs4_client *clp; + struct nfsd_net *nn = net_generic(current->nsproxy->net_ns, + nfsd_net_id); + LIST_HEAD(reaplist); - spin_lock(&state_lock); - count = nfsd_find_all_delegations(clp, max, &victims); - list_for_each_entry_safe(dp, next, &victims, dl_recall_lru) - nfsd_break_one_deleg(dp); - spin_unlock(&state_lock); + if (!nfsd_netns_ready(nn)) + return count; + spin_lock(&nn->client_lock); + list_for_each_entry(clp, &nn->client_lru, cl_lru) { + count += nfsd_collect_client_openowners(clp, &reaplist, + max - count); + if (max != 0 && count >= max) + break; + } + spin_unlock(&nn->client_lock); + nfsd_reap_openowners(&reaplist); return count; } -u64 nfsd_print_client_delegations(struct nfs4_client *clp, u64 max) +static u64 nfsd_find_all_delegations(struct nfs4_client *clp, u64 max, + struct list_head *victims) { + struct nfs4_delegation *dp, *next; + struct nfsd_net *nn = net_generic(current->nsproxy->net_ns, + nfsd_net_id); u64 count = 0; + lockdep_assert_held(&nn->client_lock); + spin_lock(&state_lock); - count = nfsd_find_all_delegations(clp, max, NULL); + list_for_each_entry_safe(dp, next, &clp->cl_delegations, dl_perclnt) { + if (victims) { + /* + * It's not safe to mess with delegations that have a + * non-zero dl_time. They might have already been broken + * and could be processed by the laundromat outside of + * the state_lock. Just leave them be. + */ + if (dp->dl_time != 0) + continue; + + atomic_inc(&clp->cl_refcount); + unhash_delegation_locked(dp); + list_add(&dp->dl_recall_lru, victims); + } + ++count; + /* + * Despite the fact that these functions deal with + * 64-bit integers for "count", we must ensure that + * it doesn't blow up the clp->cl_refcount. Throw a + * warning if we start to approach INT_MAX here. + */ + WARN_ON_ONCE(count == (INT_MAX / 2)); + if (count == max) + break; + } spin_unlock(&state_lock); + return count; +} + +static u64 +nfsd_print_client_delegations(struct nfs4_client *clp) +{ + u64 count = nfsd_find_all_delegations(clp, 0, NULL); nfsd_print_count(clp, count, "delegations"); return count; } -u64 nfsd_for_n_state(u64 max, u64 (*func)(struct nfs4_client *, u64)) +u64 +nfsd_inject_print_delegations(void) { - struct nfs4_client *clp, *next; + struct nfs4_client *clp; u64 count = 0; - struct nfsd_net *nn = net_generic(current->nsproxy->net_ns, nfsd_net_id); + struct nfsd_net *nn = net_generic(current->nsproxy->net_ns, + nfsd_net_id); if (!nfsd_netns_ready(nn)) return 0; - list_for_each_entry_safe(clp, next, &nn->client_lru, cl_lru) { - count += func(clp, max - count); - if ((max != 0) && (count >= max)) - break; + spin_lock(&nn->client_lock); + list_for_each_entry(clp, &nn->client_lru, cl_lru) + count += nfsd_print_client_delegations(clp); + spin_unlock(&nn->client_lock); + + return count; +} + +static void +nfsd_forget_delegations(struct list_head *reaplist) +{ + struct nfs4_client *clp; + struct nfs4_delegation *dp, *next; + + list_for_each_entry_safe(dp, next, reaplist, dl_recall_lru) { + list_del_init(&dp->dl_recall_lru); + clp = dp->dl_stid.sc_client; + revoke_delegation(dp); + put_client(clp); } +} +u64 +nfsd_inject_forget_client_delegations(struct sockaddr_storage *addr, + size_t addr_size) +{ + u64 count = 0; + struct nfs4_client *clp; + struct nfsd_net *nn = net_generic(current->nsproxy->net_ns, + nfsd_net_id); + LIST_HEAD(reaplist); + + if (!nfsd_netns_ready(nn)) + return count; + + spin_lock(&nn->client_lock); + clp = nfsd_find_client(addr, addr_size); + if (clp) + count = nfsd_find_all_delegations(clp, 0, &reaplist); + spin_unlock(&nn->client_lock); + + nfsd_forget_delegations(&reaplist); return count; } -struct nfs4_client *nfsd_find_client(struct sockaddr_storage *addr, size_t addr_size) +u64 +nfsd_inject_forget_delegations(u64 max) { + u64 count = 0; struct nfs4_client *clp; - struct nfsd_net *nn = net_generic(current->nsproxy->net_ns, nfsd_net_id); + struct nfsd_net *nn = net_generic(current->nsproxy->net_ns, + nfsd_net_id); + LIST_HEAD(reaplist); if (!nfsd_netns_ready(nn)) - return NULL; + return count; + spin_lock(&nn->client_lock); list_for_each_entry(clp, &nn->client_lru, cl_lru) { - if (memcmp(&clp->cl_addr, addr, addr_size) == 0) - return clp; + count += nfsd_find_all_delegations(clp, max - count, &reaplist); + if (max != 0 && count >= max) + break; } - return NULL; + spin_unlock(&nn->client_lock); + nfsd_forget_delegations(&reaplist); + return count; } +static void +nfsd_recall_delegations(struct list_head *reaplist) +{ + struct nfs4_client *clp; + struct nfs4_delegation *dp, *next; + + list_for_each_entry_safe(dp, next, reaplist, dl_recall_lru) { + list_del_init(&dp->dl_recall_lru); + clp = dp->dl_stid.sc_client; + /* + * We skipped all entries that had a zero dl_time before, + * so we can now reset the dl_time back to 0. If a delegation + * break comes in now, then it won't make any difference since + * we're recalling it either way. + */ + spin_lock(&state_lock); + dp->dl_time = 0; + spin_unlock(&state_lock); + nfsd_break_one_deleg(dp); + put_client(clp); + } +} + +u64 +nfsd_inject_recall_client_delegations(struct sockaddr_storage *addr, + size_t addr_size) +{ + u64 count = 0; + struct nfs4_client *clp; + struct nfsd_net *nn = net_generic(current->nsproxy->net_ns, + nfsd_net_id); + LIST_HEAD(reaplist); + + if (!nfsd_netns_ready(nn)) + return count; + + spin_lock(&nn->client_lock); + clp = nfsd_find_client(addr, addr_size); + if (clp) + count = nfsd_find_all_delegations(clp, 0, &reaplist); + spin_unlock(&nn->client_lock); + + nfsd_recall_delegations(&reaplist); + return count; +} + +u64 +nfsd_inject_recall_delegations(u64 max) +{ + u64 count = 0; + struct nfs4_client *clp, *next; + struct nfsd_net *nn = net_generic(current->nsproxy->net_ns, + nfsd_net_id); + LIST_HEAD(reaplist); + + if (!nfsd_netns_ready(nn)) + return count; + + spin_lock(&nn->client_lock); + list_for_each_entry_safe(clp, next, &nn->client_lru, cl_lru) { + count += nfsd_find_all_delegations(clp, max - count, &reaplist); + if (max != 0 && ++count >= max) + break; + } + spin_unlock(&nn->client_lock); + nfsd_recall_delegations(&reaplist); + return count; +} #endif /* CONFIG_NFSD_FAULT_INJECTION */ /* @@ -5113,14 +6275,6 @@ static int nfs4_state_create_net(struct net *net) CLIENT_HASH_SIZE, GFP_KERNEL); if (!nn->unconf_id_hashtbl) goto err_unconf_id; - nn->ownerstr_hashtbl = kmalloc(sizeof(struct list_head) * - OWNER_HASH_SIZE, GFP_KERNEL); - if (!nn->ownerstr_hashtbl) - goto err_ownerstr; - nn->lockowner_ino_hashtbl = kmalloc(sizeof(struct list_head) * - LOCKOWNER_INO_HASH_SIZE, GFP_KERNEL); - if (!nn->lockowner_ino_hashtbl) - goto err_lockowner_ino; nn->sessionid_hashtbl = kmalloc(sizeof(struct list_head) * SESSION_HASH_SIZE, GFP_KERNEL); if (!nn->sessionid_hashtbl) @@ -5130,10 +6284,6 @@ static int nfs4_state_create_net(struct net *net) INIT_LIST_HEAD(&nn->conf_id_hashtbl[i]); INIT_LIST_HEAD(&nn->unconf_id_hashtbl[i]); } - for (i = 0; i < OWNER_HASH_SIZE; i++) - INIT_LIST_HEAD(&nn->ownerstr_hashtbl[i]); - for (i = 0; i < LOCKOWNER_INO_HASH_SIZE; i++) - INIT_LIST_HEAD(&nn->lockowner_ino_hashtbl[i]); for (i = 0; i < SESSION_HASH_SIZE; i++) INIT_LIST_HEAD(&nn->sessionid_hashtbl[i]); nn->conf_name_tree = RB_ROOT; @@ -5149,10 +6299,6 @@ static int nfs4_state_create_net(struct net *net) return 0; err_sessionid: - kfree(nn->lockowner_ino_hashtbl); -err_lockowner_ino: - kfree(nn->ownerstr_hashtbl); -err_ownerstr: kfree(nn->unconf_id_hashtbl); err_unconf_id: kfree(nn->conf_id_hashtbl); @@ -5182,8 +6328,6 @@ nfs4_state_destroy_net(struct net *net) } kfree(nn->sessionid_hashtbl); - kfree(nn->lockowner_ino_hashtbl); - kfree(nn->ownerstr_hashtbl); kfree(nn->unconf_id_hashtbl); kfree(nn->conf_id_hashtbl); put_net(net); @@ -5247,22 +6391,22 @@ nfs4_state_shutdown_net(struct net *net) cancel_delayed_work_sync(&nn->laundromat_work); locks_end_grace(&nn->nfsd4_manager); - nfs4_lock_state(); INIT_LIST_HEAD(&reaplist); spin_lock(&state_lock); list_for_each_safe(pos, next, &nn->del_recall_lru) { dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru); - list_move(&dp->dl_recall_lru, &reaplist); + unhash_delegation_locked(dp); + list_add(&dp->dl_recall_lru, &reaplist); } spin_unlock(&state_lock); list_for_each_safe(pos, next, &reaplist) { dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru); - destroy_delegation(dp); + list_del_init(&dp->dl_recall_lru); + nfs4_put_stid(&dp->dl_stid); } nfsd4_client_tracking_exit(net); nfs4_state_destroy_net(net); - nfs4_unlock_state(); } void diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 944275c8f56d..b01f6e100ee8 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -181,28 +181,43 @@ static int zero_clientid(clientid_t *clid) } /** - * defer_free - mark an allocation as deferred freed - * @argp: NFSv4 compound argument structure to be freed with - * @release: release callback to free @p, typically kfree() - * @p: pointer to be freed + * svcxdr_tmpalloc - allocate memory to be freed after compound processing + * @argp: NFSv4 compound argument structure + * @p: pointer to be freed (with kfree()) * * Marks @p to be freed when processing the compound operation * described in @argp finishes. */ -static int -defer_free(struct nfsd4_compoundargs *argp, - void (*release)(const void *), void *p) +static void * +svcxdr_tmpalloc(struct nfsd4_compoundargs *argp, u32 len) { - struct tmpbuf *tb; + struct svcxdr_tmpbuf *tb; - tb = kmalloc(sizeof(*tb), GFP_KERNEL); + tb = kmalloc(sizeof(*tb) + len, GFP_KERNEL); if (!tb) - return -ENOMEM; - tb->buf = p; - tb->release = release; + return NULL; tb->next = argp->to_free; argp->to_free = tb; - return 0; + return tb->buf; +} + +/* + * For xdr strings that need to be passed to other kernel api's + * as null-terminated strings. + * + * Note null-terminating in place usually isn't safe since the + * buffer might end on a page boundary. + */ +static char * +svcxdr_dupstr(struct nfsd4_compoundargs *argp, void *buf, u32 len) +{ + char *p = svcxdr_tmpalloc(argp, len + 1); + + if (!p) + return NULL; + memcpy(p, buf, len); + p[len] = '\0'; + return p; } /** @@ -217,19 +232,13 @@ defer_free(struct nfsd4_compoundargs *argp, */ static char *savemem(struct nfsd4_compoundargs *argp, __be32 *p, int nbytes) { - if (p == argp->tmp) { - p = kmemdup(argp->tmp, nbytes, GFP_KERNEL); - if (!p) - return NULL; - } else { - BUG_ON(p != argp->tmpp); - argp->tmpp = NULL; - } - if (defer_free(argp, kfree, p)) { - kfree(p); + void *ret; + + ret = svcxdr_tmpalloc(argp, nbytes); + if (!ret) return NULL; - } else - return (char *)p; + memcpy(ret, p, nbytes); + return ret; } static __be32 @@ -292,12 +301,10 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, if (nace > NFS4_ACL_MAX) return nfserr_fbig; - *acl = nfs4_acl_new(nace); + *acl = svcxdr_tmpalloc(argp, nfs4_acl_bytes(nace)); if (*acl == NULL) return nfserr_jukebox; - defer_free(argp, kfree, *acl); - (*acl)->naces = nace; for (ace = (*acl)->aces; ace < (*acl)->aces + nace; ace++) { READ_BUF(16); len += 16; @@ -418,12 +425,10 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, return nfserr_badlabel; len += (XDR_QUADLEN(dummy32) << 2); READMEM(buf, dummy32); - label->data = kzalloc(dummy32 + 1, GFP_KERNEL); + label->len = dummy32; + label->data = svcxdr_dupstr(argp, buf, dummy32); if (!label->data) return nfserr_jukebox; - label->len = dummy32; - defer_free(argp, kfree, label->data); - memcpy(label->data, buf, dummy32); } #endif @@ -598,20 +603,11 @@ nfsd4_decode_create(struct nfsd4_compoundargs *argp, struct nfsd4_create *create switch (create->cr_type) { case NF4LNK: READ_BUF(4); - create->cr_linklen = be32_to_cpup(p++); - READ_BUF(create->cr_linklen); - /* - * The VFS will want a null-terminated string, and - * null-terminating in place isn't safe since this might - * end on a page boundary: - */ - create->cr_linkname = - kmalloc(create->cr_linklen + 1, GFP_KERNEL); - if (!create->cr_linkname) + create->cr_datalen = be32_to_cpup(p++); + READ_BUF(create->cr_datalen); + create->cr_data = svcxdr_dupstr(argp, p, create->cr_datalen); + if (!create->cr_data) return nfserr_jukebox; - memcpy(create->cr_linkname, p, create->cr_linklen); - create->cr_linkname[create->cr_linklen] = '\0'; - defer_free(argp, kfree, create->cr_linkname); break; case NF4BLK: case NF4CHR: @@ -1481,13 +1477,12 @@ nfsd4_decode_test_stateid(struct nfsd4_compoundargs *argp, struct nfsd4_test_sta INIT_LIST_HEAD(&test_stateid->ts_stateid_list); for (i = 0; i < test_stateid->ts_num_ids; i++) { - stateid = kmalloc(sizeof(struct nfsd4_test_stateid_id), GFP_KERNEL); + stateid = svcxdr_tmpalloc(argp, sizeof(*stateid)); if (!stateid) { status = nfserrno(-ENOMEM); goto out; } - defer_free(argp, kfree, stateid); INIT_LIST_HEAD(&stateid->ts_id_list); list_add_tail(&stateid->ts_id_list, &test_stateid->ts_stateid_list); @@ -1640,7 +1635,7 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) goto xdr_error; if (argp->opcnt > ARRAY_SIZE(argp->iops)) { - argp->ops = kmalloc(argp->opcnt * sizeof(*argp->ops), GFP_KERNEL); + argp->ops = kzalloc(argp->opcnt * sizeof(*argp->ops), GFP_KERNEL); if (!argp->ops) { argp->ops = argp->iops; dprintk("nfsd: couldn't allocate room for COMPOUND\n"); @@ -2662,6 +2657,7 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen, struct xdr_stream *xdr = cd->xdr; int start_offset = xdr->buf->len; int cookie_offset; + u32 name_and_cookie; int entry_bytes; __be32 nfserr = nfserr_toosmall; __be64 wire_offset; @@ -2723,7 +2719,14 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen, cd->rd_maxcount -= entry_bytes; if (!cd->rd_dircount) goto fail; - cd->rd_dircount--; + /* + * RFC 3530 14.2.24 describes rd_dircount as only a "hint", so + * let's always let through the first entry, at least: + */ + name_and_cookie = 4 * XDR_QUADLEN(namlen) + 8; + if (name_and_cookie > cd->rd_dircount && cd->cookie_offset) + goto fail; + cd->rd_dircount -= min(cd->rd_dircount, name_and_cookie); cd->cookie_offset = cookie_offset; skip_entry: cd->common.err = nfs_ok; @@ -3077,11 +3080,8 @@ static __be32 nfsd4_encode_splice_read( __be32 nfserr; __be32 *p = xdr->p - 2; - /* - * Don't inline pages unless we know there's room for eof, - * count, and possible padding: - */ - if (xdr->end - xdr->p < 3) + /* Make sure there will be room for padding if needed */ + if (xdr->end - xdr->p < 1) return nfserr_resource; nfserr = nfsd_splice_read(read->rd_rqstp, file, @@ -3104,7 +3104,8 @@ static __be32 nfsd4_encode_splice_read( buf->page_len = maxcount; buf->len += maxcount; - xdr->page_ptr += (maxcount + PAGE_SIZE - 1) / PAGE_SIZE; + xdr->page_ptr += (buf->page_base + maxcount + PAGE_SIZE - 1) + / PAGE_SIZE; /* Use rest of head for padding and remaining ops: */ buf->tail[0].iov_base = xdr->p; @@ -3147,9 +3148,7 @@ static __be32 nfsd4_encode_readv(struct nfsd4_compoundres *resp, len = maxcount; v = 0; - thislen = (void *)xdr->end - (void *)xdr->p; - if (len < thislen) - thislen = len; + thislen = min_t(long, len, ((void *)xdr->end - (void *)xdr->p)); p = xdr_reserve_space(xdr, (thislen+3)&~3); WARN_ON_ONCE(!p); resp->rqstp->rq_vec[v].iov_base = p; @@ -3216,10 +3215,8 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, xdr_commit_encode(xdr); maxcount = svc_max_payload(resp->rqstp); - if (maxcount > xdr->buf->buflen - xdr->buf->len) - maxcount = xdr->buf->buflen - xdr->buf->len; - if (maxcount > read->rd_length) - maxcount = read->rd_length; + maxcount = min_t(unsigned long, maxcount, (xdr->buf->buflen - xdr->buf->len)); + maxcount = min_t(unsigned long, maxcount, read->rd_length); if (!read->rd_filp) { err = nfsd_get_tmp_read_open(resp->rqstp, read->rd_fhp, @@ -3333,6 +3330,10 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4 } maxcount = min_t(int, maxcount-16, bytes_left); + /* RFC 3530 14.2.24 allows us to ignore dircount when it's 0: */ + if (!readdir->rd_dircount) + readdir->rd_dircount = INT_MAX; + readdir->xdr = xdr; readdir->rd_maxcount = maxcount; readdir->common.err = 0; @@ -3937,8 +3938,6 @@ status: * * XDR note: do not encode rp->rp_buflen: the buffer contains the * previously sent already encoded operation. - * - * called with nfs4_lock_state() held */ void nfsd4_encode_replay(struct xdr_stream *xdr, struct nfsd4_op *op) @@ -3977,9 +3976,8 @@ int nfsd4_release_compoundargs(void *rq, __be32 *p, void *resp) kfree(args->tmpp); args->tmpp = NULL; while (args->to_free) { - struct tmpbuf *tb = args->to_free; + struct svcxdr_tmpbuf *tb = args->to_free; args->to_free = tb->next; - tb->release(tb->buf); kfree(tb); } return 1; @@ -4012,7 +4010,6 @@ nfs4svc_encode_compoundres(struct svc_rqst *rqstp, __be32 *p, struct nfsd4_compo /* * All that remains is to write the tag and operation count... */ - struct nfsd4_compound_state *cs = &resp->cstate; struct xdr_buf *buf = resp->xdr.buf; WARN_ON_ONCE(buf->len != buf->head[0].iov_len + buf->page_len + @@ -4026,19 +4023,7 @@ nfs4svc_encode_compoundres(struct svc_rqst *rqstp, __be32 *p, struct nfsd4_compo p += XDR_QUADLEN(resp->taglen); *p++ = htonl(resp->opcnt); - if (nfsd4_has_session(cs)) { - struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); - struct nfs4_client *clp = cs->session->se_client; - if (cs->status != nfserr_replay_cache) { - nfsd4_store_cache_entry(resp); - cs->slot->sl_flags &= ~NFSD4_SLOT_INUSE; - } - /* Renew the clientid on success and on replay */ - spin_lock(&nn->client_lock); - nfsd4_put_session(cs->session); - spin_unlock(&nn->client_lock); - put_client_renew(clp); - } + nfsd4_sequence_done(resp); return 1; } diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c index 6040da8830ff..ff9567633245 100644 --- a/fs/nfsd/nfscache.c +++ b/fs/nfsd/nfscache.c @@ -221,7 +221,12 @@ static void hash_refile(struct svc_cacherep *rp) { hlist_del_init(&rp->c_hash); - hlist_add_head(&rp->c_hash, cache_hash + hash_32(rp->c_xid, maskbits)); + /* + * No point in byte swapping c_xid since we're just using it to pick + * a hash bucket. + */ + hlist_add_head(&rp->c_hash, cache_hash + + hash_32((__force u32)rp->c_xid, maskbits)); } /* @@ -356,7 +361,11 @@ nfsd_cache_search(struct svc_rqst *rqstp, __wsum csum) struct hlist_head *rh; unsigned int entries = 0; - rh = &cache_hash[hash_32(rqstp->rq_xid, maskbits)]; + /* + * No point in byte swapping rq_xid since we're just using it to pick + * a hash bucket. + */ + rh = &cache_hash[hash_32((__force u32)rqstp->rq_xid, maskbits)]; hlist_for_each_entry(rp, rh, c_hash) { ++entries; if (nfsd_cache_match(rqstp, csum, rp)) { diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 51844048937f..4e042105fb6e 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -39,6 +39,7 @@ enum { NFSD_Versions, NFSD_Ports, NFSD_MaxBlkSize, + NFSD_MaxConnections, NFSD_SupportedEnctypes, /* * The below MUST come last. Otherwise we leave a hole in nfsd_files[] @@ -62,6 +63,7 @@ static ssize_t write_pool_threads(struct file *file, char *buf, size_t size); static ssize_t write_versions(struct file *file, char *buf, size_t size); static ssize_t write_ports(struct file *file, char *buf, size_t size); static ssize_t write_maxblksize(struct file *file, char *buf, size_t size); +static ssize_t write_maxconn(struct file *file, char *buf, size_t size); #ifdef CONFIG_NFSD_V4 static ssize_t write_leasetime(struct file *file, char *buf, size_t size); static ssize_t write_gracetime(struct file *file, char *buf, size_t size); @@ -77,6 +79,7 @@ static ssize_t (*write_op[])(struct file *, char *, size_t) = { [NFSD_Versions] = write_versions, [NFSD_Ports] = write_ports, [NFSD_MaxBlkSize] = write_maxblksize, + [NFSD_MaxConnections] = write_maxconn, #ifdef CONFIG_NFSD_V4 [NFSD_Leasetime] = write_leasetime, [NFSD_Gracetime] = write_gracetime, @@ -369,8 +372,7 @@ static ssize_t write_filehandle(struct file *file, char *buf, size_t size) if (maxsize < NFS_FHSIZE) return -EINVAL; - if (maxsize > NFS3_FHSIZE) - maxsize = NFS3_FHSIZE; + maxsize = min(maxsize, NFS3_FHSIZE); if (qword_get(&mesg, mesg, size)>0) return -EINVAL; @@ -871,10 +873,8 @@ static ssize_t write_maxblksize(struct file *file, char *buf, size_t size) /* force bsize into allowed range and * required alignment. */ - if (bsize < 1024) - bsize = 1024; - if (bsize > NFSSVC_MAXBLKSIZE) - bsize = NFSSVC_MAXBLKSIZE; + bsize = max_t(int, bsize, 1024); + bsize = min_t(int, bsize, NFSSVC_MAXBLKSIZE); bsize &= ~(1024-1); mutex_lock(&nfsd_mutex); if (nn->nfsd_serv) { @@ -889,6 +889,44 @@ static ssize_t write_maxblksize(struct file *file, char *buf, size_t size) nfsd_max_blksize); } +/** + * write_maxconn - Set or report the current max number of connections + * + * Input: + * buf: ignored + * size: zero + * OR + * + * Input: + * buf: C string containing an unsigned + * integer value representing the new + * number of max connections + * size: non-zero length of C string in @buf + * Output: + * On success: passed-in buffer filled with '\n'-terminated C string + * containing numeric value of max_connections setting + * for this net namespace; + * return code is the size in bytes of the string + * On error: return code is zero or a negative errno value + */ +static ssize_t write_maxconn(struct file *file, char *buf, size_t size) +{ + char *mesg = buf; + struct net *net = file->f_dentry->d_sb->s_fs_info; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + unsigned int maxconn = nn->max_connections; + + if (size > 0) { + int rv = get_uint(&mesg, &maxconn); + + if (rv) + return rv; + nn->max_connections = maxconn; + } + + return scnprintf(buf, SIMPLE_TRANSACTION_LIMIT, "%u\n", maxconn); +} + #ifdef CONFIG_NFSD_V4 static ssize_t __nfsd4_write_time(struct file *file, char *buf, size_t size, time_t *time, struct nfsd_net *nn) @@ -1064,6 +1102,7 @@ static int nfsd_fill_super(struct super_block * sb, void * data, int silent) [NFSD_Versions] = {"versions", &transaction_ops, S_IWUSR|S_IRUSR}, [NFSD_Ports] = {"portlist", &transaction_ops, S_IWUSR|S_IRUGO}, [NFSD_MaxBlkSize] = {"max_block_size", &transaction_ops, S_IWUSR|S_IRUGO}, + [NFSD_MaxConnections] = {"max_connections", &transaction_ops, S_IWUSR|S_IRUGO}, #if defined(CONFIG_SUNRPC_GSS) || defined(CONFIG_SUNRPC_GSS_MODULE) [NFSD_SupportedEnctypes] = {"supported_krb5_enctypes", &supported_enctypes_ops, S_IRUGO}, #endif /* CONFIG_SUNRPC_GSS or CONFIG_SUNRPC_GSS_MODULE */ diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index ec8393418154..e883a5868be6 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c @@ -162,7 +162,14 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp) /* deprecated, convert to type 3 */ len = key_len(FSID_ENCODE_DEV)/4; fh->fh_fsid_type = FSID_ENCODE_DEV; - fh->fh_fsid[0] = new_encode_dev(MKDEV(ntohl(fh->fh_fsid[0]), ntohl(fh->fh_fsid[1]))); + /* + * struct knfsd_fh uses host-endian fields, which are + * sometimes used to hold net-endian values. This + * confuses sparse, so we must use __force here to + * keep it from complaining. + */ + fh->fh_fsid[0] = new_encode_dev(MKDEV(ntohl((__force __be32)fh->fh_fsid[0]), + ntohl((__force __be32)fh->fh_fsid[1]))); fh->fh_fsid[1] = fh->fh_fsid[2]; } data_left -= len; @@ -539,8 +546,7 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry, dentry); fhp->fh_dentry = dget(dentry); /* our internal copy */ - fhp->fh_export = exp; - cache_get(&exp->h); + fhp->fh_export = exp_get(exp); if (fhp->fh_handle.fh_version == 0xca) { /* old style filehandle please */ diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h index 2e89e70ac15c..08236d70c667 100644 --- a/fs/nfsd/nfsfh.h +++ b/fs/nfsd/nfsfh.h @@ -73,8 +73,15 @@ enum fsid_source { extern enum fsid_source fsid_source(struct svc_fh *fhp); -/* This might look a little large to "inline" but in all calls except +/* + * This might look a little large to "inline" but in all calls except * one, 'vers' is constant so moste of the function disappears. + * + * In some cases the values are considered to be host endian and in + * others, net endian. fsidv is always considered to be u32 as the + * callers don't know which it will be. So we must use __force to keep + * sparse from complaining. Since these values are opaque to the + * client, that shouldn't be a problem. */ static inline void mk_fsid(int vers, u32 *fsidv, dev_t dev, ino_t ino, u32 fsid, unsigned char *uuid) @@ -82,7 +89,7 @@ static inline void mk_fsid(int vers, u32 *fsidv, dev_t dev, ino_t ino, u32 *up; switch(vers) { case FSID_DEV: - fsidv[0] = htonl((MAJOR(dev)<<16) | + fsidv[0] = (__force __u32)htonl((MAJOR(dev)<<16) | MINOR(dev)); fsidv[1] = ino_t_to_u32(ino); break; @@ -90,8 +97,8 @@ static inline void mk_fsid(int vers, u32 *fsidv, dev_t dev, ino_t ino, fsidv[0] = fsid; break; case FSID_MAJOR_MINOR: - fsidv[0] = htonl(MAJOR(dev)); - fsidv[1] = htonl(MINOR(dev)); + fsidv[0] = (__force __u32)htonl(MAJOR(dev)); + fsidv[1] = (__force __u32)htonl(MINOR(dev)); fsidv[2] = ino_t_to_u32(ino); break; diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c index 54c6b3d3cc79..b8680738f588 100644 --- a/fs/nfsd/nfsproc.c +++ b/fs/nfsd/nfsproc.c @@ -403,12 +403,13 @@ nfsd_proc_symlink(struct svc_rqst *rqstp, struct nfsd_symlinkargs *argp, fh_init(&newfh, NFS_FHSIZE); /* - * Create the link, look up new file and set attrs. + * Crazy hack: the request fits in a page, and already-decoded + * attributes follow argp->tname, so it's safe to just write a + * null to ensure it's null-terminated: */ + argp->tname[argp->tlen] = '\0'; nfserr = nfsd_symlink(rqstp, &argp->ffh, argp->fname, argp->flen, - argp->tname, argp->tlen, - &newfh, &argp->attrs); - + argp->tname, &newfh); fh_put(&argp->ffh); fh_put(&newfh); @@ -716,6 +717,7 @@ nfserrno (int errno) { nfserr_noent, -ENOENT }, { nfserr_io, -EIO }, { nfserr_nxio, -ENXIO }, + { nfserr_fbig, -E2BIG }, { nfserr_acces, -EACCES }, { nfserr_exist, -EEXIST }, { nfserr_xdev, -EXDEV }, @@ -743,6 +745,7 @@ nfserrno (int errno) { nfserr_notsupp, -EOPNOTSUPP }, { nfserr_toosmall, -ETOOSMALL }, { nfserr_serverfault, -ESERVERFAULT }, + { nfserr_serverfault, -ENFILE }, }; int i; @@ -750,7 +753,7 @@ nfserrno (int errno) if (nfs_errtbl[i].syserr == errno) return nfs_errtbl[i].nfserr; } - printk (KERN_INFO "nfsd: non-standard errno: %d\n", errno); + WARN(1, "nfsd: non-standard errno: %d\n", errno); return nfserr_io; } diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 1879e43f2868..752d56bbe0ba 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -221,7 +221,8 @@ static int nfsd_startup_generic(int nrservs) */ ret = nfsd_racache_init(2*nrservs); if (ret) - return ret; + goto dec_users; + ret = nfs4_state_start(); if (ret) goto out_racache; @@ -229,6 +230,8 @@ static int nfsd_startup_generic(int nrservs) out_racache: nfsd_racache_shutdown(); +dec_users: + nfsd_users--; return ret; } @@ -405,6 +408,7 @@ int nfsd_create_serv(struct net *net) if (nn->nfsd_serv == NULL) return -ENOMEM; + nn->nfsd_serv->sv_maxconn = nn->max_connections; error = svc_bind(nn->nfsd_serv, net); if (error < 0) { svc_destroy(nn->nfsd_serv); @@ -469,8 +473,7 @@ int nfsd_set_nrthreads(int n, int *nthreads, struct net *net) /* enforce a global maximum number of threads */ tot = 0; for (i = 0; i < n; i++) { - if (nthreads[i] > NFSD_MAXSERVS) - nthreads[i] = NFSD_MAXSERVS; + nthreads[i] = min(nthreads[i], NFSD_MAXSERVS); tot += nthreads[i]; } if (tot > NFSD_MAXSERVS) { @@ -519,11 +522,11 @@ nfsd_svc(int nrservs, struct net *net) mutex_lock(&nfsd_mutex); dprintk("nfsd: creating service\n"); - if (nrservs <= 0) - nrservs = 0; - if (nrservs > NFSD_MAXSERVS) - nrservs = NFSD_MAXSERVS; + + nrservs = max(nrservs, 0); + nrservs = min(nrservs, NFSD_MAXSERVS); error = 0; + if (nrservs == 0 && nn->nfsd_serv == NULL) goto out; @@ -564,6 +567,7 @@ nfsd(void *vrqstp) struct svc_rqst *rqstp = (struct svc_rqst *) vrqstp; struct svc_xprt *perm_sock = list_entry(rqstp->rq_server->sv_permsocks.next, typeof(struct svc_xprt), xpt_list); struct net *net = perm_sock->xpt_net; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); int err; /* Lock module and set up kernel thread */ @@ -597,6 +601,9 @@ nfsd(void *vrqstp) * The main request loop */ for (;;) { + /* Update sv_maxconn if it has changed */ + rqstp->rq_server->sv_maxconn = nn->max_connections; + /* * Find a socket with data available and call its * recvfrom routine. diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c index 1ac306b769df..412d7061f9e5 100644 --- a/fs/nfsd/nfsxdr.c +++ b/fs/nfsd/nfsxdr.c @@ -257,8 +257,7 @@ nfssvc_decode_readargs(struct svc_rqst *rqstp, __be32 *p, len = args->count = ntohl(*p++); p++; /* totalcount - unused */ - if (len > NFSSVC_MAXBLKSIZE_V2) - len = NFSSVC_MAXBLKSIZE_V2; + len = min_t(unsigned int, len, NFSSVC_MAXBLKSIZE_V2); /* set up somewhere to store response. * We take pages, put them on reslist and include in iovec @@ -268,7 +267,7 @@ nfssvc_decode_readargs(struct svc_rqst *rqstp, __be32 *p, struct page *p = *(rqstp->rq_next_page++); rqstp->rq_vec[v].iov_base = page_address(p); - rqstp->rq_vec[v].iov_len = len < PAGE_SIZE?len:PAGE_SIZE; + rqstp->rq_vec[v].iov_len = min_t(unsigned int, len, PAGE_SIZE); len -= rqstp->rq_vec[v].iov_len; v++; } @@ -400,9 +399,7 @@ nfssvc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p, return 0; args->cookie = ntohl(*p++); args->count = ntohl(*p++); - if (args->count > PAGE_SIZE) - args->count = PAGE_SIZE; - + args->count = min_t(u32, args->count, PAGE_SIZE); args->buffer = page_address(*(rqstp->rq_next_page++)); return xdr_argsize_check(rqstp, p); @@ -516,10 +513,11 @@ nfssvc_encode_entry(void *ccdv, const char *name, } if (cd->offset) *cd->offset = htonl(offset); - if (namlen > NFS2_MAXNAMLEN) - namlen = NFS2_MAXNAMLEN;/* truncate filename */ + /* truncate filename */ + namlen = min(namlen, NFS2_MAXNAMLEN); slen = XDR_QUADLEN(namlen); + if ((buflen = cd->buflen - slen - 4) < 0) { cd->common.err = nfserr_toosmall; return -EINVAL; diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 374c66283ac5..4a89e00d7461 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -72,7 +72,13 @@ struct nfsd4_callback { bool cb_done; }; +/* + * A core object that represents a "common" stateid. These are generally + * embedded within the different (more specific) stateid objects and contain + * fields that are of general use to any stateid. + */ struct nfs4_stid { + atomic_t sc_count; #define NFS4_OPEN_STID 1 #define NFS4_LOCK_STID 2 #define NFS4_DELEG_STID 4 @@ -80,22 +86,43 @@ struct nfs4_stid { #define NFS4_CLOSED_STID 8 /* For a deleg stateid kept around only to process free_stateid's: */ #define NFS4_REVOKED_DELEG_STID 16 +#define NFS4_CLOSED_DELEG_STID 32 unsigned char sc_type; stateid_t sc_stateid; struct nfs4_client *sc_client; + struct nfs4_file *sc_file; + void (*sc_free)(struct nfs4_stid *); }; +/* + * Represents a delegation stateid. The nfs4_client holds references to these + * and they are put when it is being destroyed or when the delegation is + * returned by the client: + * + * o 1 reference as long as a delegation is still in force (taken when it's + * alloc'd, put when it's returned or revoked) + * + * o 1 reference as long as a recall rpc is in progress (taken when the lease + * is broken, put when the rpc exits) + * + * o 1 more ephemeral reference for each nfsd thread currently doing something + * with that delegation without holding the cl_lock + * + * If the server attempts to recall a delegation and the client doesn't do so + * before a timeout, the server may also revoke the delegation. In that case, + * the object will either be destroyed (v4.0) or moved to a per-client list of + * revoked delegations (v4.1+). + * + * This object is a superset of the nfs4_stid. + */ struct nfs4_delegation { struct nfs4_stid dl_stid; /* must be first field */ struct list_head dl_perfile; struct list_head dl_perclnt; struct list_head dl_recall_lru; /* delegation recalled */ - atomic_t dl_count; /* ref count */ - struct nfs4_file *dl_file; u32 dl_type; time_t dl_time; /* For recall: */ - struct knfsd_fh dl_fh; int dl_retries; struct nfsd4_callback dl_recall; }; @@ -194,6 +221,11 @@ struct nfsd4_conn { unsigned char cn_flags; }; +/* + * Representation of a v4.1+ session. These are refcounted in a similar fashion + * to the nfs4_client. References are only taken when the server is actively + * working on the object (primarily during the processing of compounds). + */ struct nfsd4_session { atomic_t se_ref; struct list_head se_hash; /* hash by sessionid */ @@ -212,8 +244,6 @@ struct nfsd4_session { struct nfsd4_slot *se_slots[]; /* forward channel slots */ }; -extern void nfsd4_put_session(struct nfsd4_session *ses); - /* formatted contents of nfs4_sessionid */ struct nfsd4_sessionid { clientid_t clientid; @@ -225,17 +255,35 @@ struct nfsd4_sessionid { /* * struct nfs4_client - one per client. Clientids live here. - * o Each nfs4_client is hashed by clientid. * - * o Each nfs4_clients is also hashed by name - * (the opaque quantity initially sent by the client to identify itself). + * The initial object created by an NFS client using SETCLIENTID (for NFSv4.0) + * or EXCHANGE_ID (for NFSv4.1+). These objects are refcounted and timestamped. + * Each nfsd_net_ns object contains a set of these and they are tracked via + * short and long form clientid. They are hashed and searched for under the + * per-nfsd_net client_lock spinlock. + * + * References to it are only held during the processing of compounds, and in + * certain other operations. In their "resting state" they have a refcount of + * 0. If they are not renewed within a lease period, they become eligible for + * destruction by the laundromat. + * + * These objects can also be destroyed prematurely by the fault injection code, + * or if the client sends certain forms of SETCLIENTID or EXCHANGE_ID updates. + * Care is taken *not* to do this however when the objects have an elevated + * refcount. + * + * o Each nfs4_client is hashed by clientid + * + * o Each nfs4_clients is also hashed by name (the opaque quantity initially + * sent by the client to identify itself). * - * o cl_perclient list is used to ensure no dangling stateowner references - * when we expire the nfs4_client + * o cl_perclient list is used to ensure no dangling stateowner references + * when we expire the nfs4_client */ struct nfs4_client { struct list_head cl_idhash; /* hash by cl_clientid.id */ struct rb_node cl_namenode; /* link into by-name trees */ + struct list_head *cl_ownerstr_hashtbl; struct list_head cl_openowners; struct idr cl_stateids; /* stateid lookup */ struct list_head cl_delegations; @@ -329,21 +377,43 @@ struct nfs4_replay { unsigned int rp_buflen; char *rp_buf; struct knfsd_fh rp_openfh; + struct mutex rp_mutex; char rp_ibuf[NFSD4_REPLAY_ISIZE]; }; +struct nfs4_stateowner; + +struct nfs4_stateowner_operations { + void (*so_unhash)(struct nfs4_stateowner *); + void (*so_free)(struct nfs4_stateowner *); +}; + +/* + * A core object that represents either an open or lock owner. The object and + * lock owner objects have one of these embedded within them. Refcounts and + * other fields common to both owner types are contained within these + * structures. + */ struct nfs4_stateowner { - struct list_head so_strhash; /* hash by op_name */ - struct list_head so_stateids; - struct nfs4_client * so_client; - /* after increment in ENCODE_SEQID_OP_TAIL, represents the next + struct list_head so_strhash; + struct list_head so_stateids; + struct nfs4_client *so_client; + const struct nfs4_stateowner_operations *so_ops; + /* after increment in nfsd4_bump_seqid, represents the next * sequence id expected from the client: */ - u32 so_seqid; - struct xdr_netobj so_owner; /* open owner name */ - struct nfs4_replay so_replay; - bool so_is_open_owner; + atomic_t so_count; + u32 so_seqid; + struct xdr_netobj so_owner; /* open owner name */ + struct nfs4_replay so_replay; + bool so_is_open_owner; }; +/* + * When a file is opened, the client provides an open state owner opaque string + * that indicates the "owner" of that open. These objects are refcounted. + * References to it are held by each open state associated with it. This object + * is a superset of the nfs4_stateowner struct. + */ struct nfs4_openowner { struct nfs4_stateowner oo_owner; /* must be first field */ struct list_head oo_perclient; @@ -358,15 +428,17 @@ struct nfs4_openowner { struct nfs4_ol_stateid *oo_last_closed_stid; time_t oo_time; /* time of placement on so_close_lru */ #define NFS4_OO_CONFIRMED 1 -#define NFS4_OO_NEW 4 unsigned char oo_flags; }; +/* + * Represents a generic "lockowner". Similar to an openowner. References to it + * are held by the lock stateids that are created on its behalf. This object is + * a superset of the nfs4_stateowner struct (or would be if it needed any extra + * fields). + */ struct nfs4_lockowner { struct nfs4_stateowner lo_owner; /* must be first element */ - struct list_head lo_owner_ino_hash; /* hash by owner,file */ - struct list_head lo_perstateid; - struct list_head lo_list; /* for temporary uses */ }; static inline struct nfs4_openowner * openowner(struct nfs4_stateowner *so) @@ -379,9 +451,17 @@ static inline struct nfs4_lockowner * lockowner(struct nfs4_stateowner *so) return container_of(so, struct nfs4_lockowner, lo_owner); } -/* nfs4_file: a file opened by some number of (open) nfs4_stateowners. */ +/* + * nfs4_file: a file opened by some number of (open) nfs4_stateowners. + * + * These objects are global. nfsd only keeps one instance of a nfs4_file per + * inode (though it may keep multiple file descriptors open per inode). These + * are tracked in the file_hashtbl which is protected by the state_lock + * spinlock. + */ struct nfs4_file { atomic_t fi_ref; + spinlock_t fi_lock; struct hlist_node fi_hash; /* hash by "struct inode *" */ struct list_head fi_stateids; struct list_head fi_delegations; @@ -395,49 +475,36 @@ struct nfs4_file { * + 1 to both of the above if NFS4_SHARE_ACCESS_BOTH is set. */ atomic_t fi_access[2]; + u32 fi_share_deny; struct file *fi_deleg_file; struct file_lock *fi_lease; atomic_t fi_delegees; - struct inode *fi_inode; + struct knfsd_fh fi_fhandle; bool fi_had_conflict; }; -/* XXX: for first cut may fall back on returning file that doesn't work - * at all? */ -static inline struct file *find_writeable_file(struct nfs4_file *f) -{ - if (f->fi_fds[O_WRONLY]) - return f->fi_fds[O_WRONLY]; - return f->fi_fds[O_RDWR]; -} - -static inline struct file *find_readable_file(struct nfs4_file *f) -{ - if (f->fi_fds[O_RDONLY]) - return f->fi_fds[O_RDONLY]; - return f->fi_fds[O_RDWR]; -} - -static inline struct file *find_any_file(struct nfs4_file *f) -{ - if (f->fi_fds[O_RDWR]) - return f->fi_fds[O_RDWR]; - else if (f->fi_fds[O_WRONLY]) - return f->fi_fds[O_WRONLY]; - else - return f->fi_fds[O_RDONLY]; -} - -/* "ol" stands for "Open or Lock". Better suggestions welcome. */ +/* + * A generic struct representing either a open or lock stateid. The nfs4_client + * holds a reference to each of these objects, and they in turn hold a + * reference to their respective stateowners. The client's reference is + * released in response to a close or unlock (depending on whether it's an open + * or lock stateid) or when the client is being destroyed. + * + * In the case of v4.0 open stateids, these objects are preserved for a little + * while after close in order to handle CLOSE replays. Those are eventually + * reclaimed via a LRU scheme by the laundromat. + * + * This object is a superset of the nfs4_stid. "ol" stands for "Open or Lock". + * Better suggestions welcome. + */ struct nfs4_ol_stateid { struct nfs4_stid st_stid; /* must be first field */ struct list_head st_perfile; struct list_head st_perstateowner; - struct list_head st_lockowners; + struct list_head st_locks; struct nfs4_stateowner * st_stateowner; - struct nfs4_file * st_file; - unsigned long st_access_bmap; - unsigned long st_deny_bmap; + unsigned char st_access_bmap; + unsigned char st_deny_bmap; struct nfs4_ol_stateid * st_openstp; }; @@ -456,15 +523,16 @@ struct nfsd_net; extern __be32 nfs4_preprocess_stateid_op(struct net *net, struct nfsd4_compound_state *cstate, stateid_t *stateid, int flags, struct file **filp); -extern void nfs4_lock_state(void); -extern void nfs4_unlock_state(void); +void nfs4_put_stid(struct nfs4_stid *s); void nfs4_remove_reclaim_record(struct nfs4_client_reclaim *, struct nfsd_net *); extern void nfs4_release_reclaim(struct nfsd_net *); extern struct nfs4_client_reclaim *nfsd4_find_reclaim_client(const char *recdir, struct nfsd_net *nn); -extern __be32 nfs4_check_open_reclaim(clientid_t *clid, bool sessions, struct nfsd_net *nn); +extern __be32 nfs4_check_open_reclaim(clientid_t *clid, + struct nfsd4_compound_state *cstate, struct nfsd_net *nn); extern int set_callback_cred(void); -extern void nfsd4_init_callback(struct nfsd4_callback *); +void nfsd4_run_cb_null(struct work_struct *w); +void nfsd4_run_cb_recall(struct work_struct *w); extern void nfsd4_probe_callback(struct nfs4_client *clp); extern void nfsd4_probe_callback_sync(struct nfs4_client *clp); extern void nfsd4_change_callback(struct nfs4_client *clp, struct nfs4_cb_conn *); @@ -472,11 +540,10 @@ extern void nfsd4_cb_recall(struct nfs4_delegation *dp); extern int nfsd4_create_callback_queue(void); extern void nfsd4_destroy_callback_queue(void); extern void nfsd4_shutdown_callback(struct nfs4_client *); -extern void nfs4_put_delegation(struct nfs4_delegation *dp); +extern void nfsd4_prepare_cb_recall(struct nfs4_delegation *dp); extern struct nfs4_client_reclaim *nfs4_client_to_reclaim(const char *name, struct nfsd_net *nn); extern bool nfs4_has_reclaimed_state(const char *name, struct nfsd_net *nn); -extern void put_client_renew(struct nfs4_client *clp); /* nfs4recover operations */ extern int nfsd4_client_tracking_init(struct net *net); @@ -490,19 +557,24 @@ extern void nfsd4_record_grace_done(struct nfsd_net *nn, time_t boot_time); #ifdef CONFIG_NFSD_FAULT_INJECTION int nfsd_fault_inject_init(void); void nfsd_fault_inject_cleanup(void); -u64 nfsd_for_n_state(u64, u64 (*)(struct nfs4_client *, u64)); -struct nfs4_client *nfsd_find_client(struct sockaddr_storage *, size_t); - -u64 nfsd_forget_client(struct nfs4_client *, u64); -u64 nfsd_forget_client_locks(struct nfs4_client*, u64); -u64 nfsd_forget_client_openowners(struct nfs4_client *, u64); -u64 nfsd_forget_client_delegations(struct nfs4_client *, u64); -u64 nfsd_recall_client_delegations(struct nfs4_client *, u64); - -u64 nfsd_print_client(struct nfs4_client *, u64); -u64 nfsd_print_client_locks(struct nfs4_client *, u64); -u64 nfsd_print_client_openowners(struct nfs4_client *, u64); -u64 nfsd_print_client_delegations(struct nfs4_client *, u64); + +u64 nfsd_inject_print_clients(void); +u64 nfsd_inject_forget_client(struct sockaddr_storage *, size_t); +u64 nfsd_inject_forget_clients(u64); + +u64 nfsd_inject_print_locks(void); +u64 nfsd_inject_forget_client_locks(struct sockaddr_storage *, size_t); +u64 nfsd_inject_forget_locks(u64); + +u64 nfsd_inject_print_openowners(void); +u64 nfsd_inject_forget_client_openowners(struct sockaddr_storage *, size_t); +u64 nfsd_inject_forget_openowners(u64); + +u64 nfsd_inject_print_delegations(void); +u64 nfsd_inject_forget_client_delegations(struct sockaddr_storage *, size_t); +u64 nfsd_inject_forget_delegations(u64); +u64 nfsd_inject_recall_client_delegations(struct sockaddr_storage *, size_t); +u64 nfsd_inject_recall_delegations(u64); #else /* CONFIG_NFSD_FAULT_INJECTION */ static inline int nfsd_fault_inject_init(void) { return 0; } static inline void nfsd_fault_inject_cleanup(void) {} diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index d49c778faecb..6ab077bb897e 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -189,8 +189,7 @@ nfsd_lookup_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp, dprintk("nfsd: nfsd_lookup(fh %s, %.*s)\n", SVCFH_fmt(fhp), len,name); dparent = fhp->fh_dentry; - exp = fhp->fh_export; - exp_get(exp); + exp = exp_get(fhp->fh_export); /* Lookup the name, but don't follow links */ if (isdotent(name, len)) { @@ -464,7 +463,7 @@ out_put_write_access: if (size_change) put_write_access(inode); if (!err) - commit_metadata(fhp); + err = nfserrno(commit_metadata(fhp)); out: return err; } @@ -820,7 +819,8 @@ static int nfsd_direct_splice_actor(struct pipe_inode_info *pipe, return __splice_from_pipe(pipe, sd, nfsd_splice_actor); } -__be32 nfsd_finish_read(struct file *file, unsigned long *count, int host_err) +static __be32 +nfsd_finish_read(struct file *file, unsigned long *count, int host_err) { if (host_err >= 0) { nfsdstats.io_read += host_err; @@ -831,7 +831,7 @@ __be32 nfsd_finish_read(struct file *file, unsigned long *count, int host_err) return nfserrno(host_err); } -int nfsd_splice_read(struct svc_rqst *rqstp, +__be32 nfsd_splice_read(struct svc_rqst *rqstp, struct file *file, loff_t offset, unsigned long *count) { struct splice_desc sd = { @@ -847,7 +847,7 @@ int nfsd_splice_read(struct svc_rqst *rqstp, return nfsd_finish_read(file, count, host_err); } -int nfsd_readv(struct file *file, loff_t offset, struct kvec *vec, int vlen, +__be32 nfsd_readv(struct file *file, loff_t offset, struct kvec *vec, int vlen, unsigned long *count) { mm_segment_t oldfs; @@ -1121,7 +1121,8 @@ nfsd_create_setattr(struct svc_rqst *rqstp, struct svc_fh *resfhp, iap->ia_valid &= ~(ATTR_UID|ATTR_GID); if (iap->ia_valid) return nfsd_setattr(rqstp, resfhp, iap, 0, (time_t)0); - return 0; + /* Callers expect file metadata to be committed here */ + return nfserrno(commit_metadata(resfhp)); } /* HPUX client sometimes creates a file in mode 000, and sets size to 0. @@ -1253,9 +1254,10 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, err = nfsd_create_setattr(rqstp, resfhp, iap); /* - * nfsd_setattr already committed the child. Transactional filesystems - * had a chance to commit changes for both parent and child - * simultaneously making the following commit_metadata a noop. + * nfsd_create_setattr already committed the child. Transactional + * filesystems had a chance to commit changes for both parent and + * child * simultaneously making the following commit_metadata a + * noop. */ err2 = nfserrno(commit_metadata(fhp)); if (err2) @@ -1426,7 +1428,8 @@ do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, err = nfsd_create_setattr(rqstp, resfhp, iap); /* - * nfsd_setattr already committed the child (and possibly also the parent). + * nfsd_create_setattr already committed the child + * (and possibly also the parent). */ if (!err) err = nfserrno(commit_metadata(fhp)); @@ -1504,16 +1507,15 @@ out_nfserr: __be32 nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp, char *fname, int flen, - char *path, int plen, - struct svc_fh *resfhp, - struct iattr *iap) + char *path, + struct svc_fh *resfhp) { struct dentry *dentry, *dnew; __be32 err, cerr; int host_err; err = nfserr_noent; - if (!flen || !plen) + if (!flen || path[0] == '\0') goto out; err = nfserr_exist; if (isdotent(fname, flen)) @@ -1534,18 +1536,7 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp, if (IS_ERR(dnew)) goto out_nfserr; - if (unlikely(path[plen] != 0)) { - char *path_alloced = kmalloc(plen+1, GFP_KERNEL); - if (path_alloced == NULL) - host_err = -ENOMEM; - else { - strncpy(path_alloced, path, plen); - path_alloced[plen] = 0; - host_err = vfs_symlink(dentry->d_inode, dnew, path_alloced); - kfree(path_alloced); - } - } else - host_err = vfs_symlink(dentry->d_inode, dnew, path); + host_err = vfs_symlink(dentry->d_inode, dnew, path); err = nfserrno(host_err); if (!err) err = nfserrno(commit_metadata(fhp)); @@ -2093,8 +2084,7 @@ nfsd_racache_init(int cache_size) if (raparm_hash[0].pb_head) return 0; nperbucket = DIV_ROUND_UP(cache_size, RAPARM_HASH_SIZE); - if (nperbucket < 2) - nperbucket = 2; + nperbucket = max(2, nperbucket); cache_size = nperbucket * RAPARM_HASH_SIZE; dprintk("nfsd: allocating %d readahead buffers.\n", cache_size); diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h index 91b6ae3f658b..c2ff3f14e5f6 100644 --- a/fs/nfsd/vfs.h +++ b/fs/nfsd/vfs.h @@ -74,9 +74,9 @@ struct raparms; __be32 nfsd_get_tmp_read_open(struct svc_rqst *, struct svc_fh *, struct file **, struct raparms **); void nfsd_put_tmp_read_open(struct file *, struct raparms *); -int nfsd_splice_read(struct svc_rqst *, +__be32 nfsd_splice_read(struct svc_rqst *, struct file *, loff_t, unsigned long *); -int nfsd_readv(struct file *, loff_t, struct kvec *, int, +__be32 nfsd_readv(struct file *, loff_t, struct kvec *, int, unsigned long *); __be32 nfsd_read(struct svc_rqst *, struct svc_fh *, loff_t, struct kvec *, int, unsigned long *); @@ -85,8 +85,8 @@ __be32 nfsd_write(struct svc_rqst *, struct svc_fh *,struct file *, __be32 nfsd_readlink(struct svc_rqst *, struct svc_fh *, char *, int *); __be32 nfsd_symlink(struct svc_rqst *, struct svc_fh *, - char *name, int len, char *path, int plen, - struct svc_fh *res, struct iattr *); + char *name, int len, char *path, + struct svc_fh *res); __be32 nfsd_link(struct svc_rqst *, struct svc_fh *, char *, int, struct svc_fh *); __be32 nfsd_rename(struct svc_rqst *, diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index 18cbb6d9c8a9..465e7799742a 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -55,6 +55,7 @@ struct nfsd4_compound_state { struct svc_fh current_fh; struct svc_fh save_fh; struct nfs4_stateowner *replay_owner; + struct nfs4_client *clp; /* For sessions DRC */ struct nfsd4_session *session; struct nfsd4_slot *slot; @@ -107,8 +108,8 @@ struct nfsd4_create { u32 cr_type; /* request */ union { /* request */ struct { - u32 namelen; - char *name; + u32 datalen; + char *data; } link; /* NF4LNK */ struct { u32 specdata1; @@ -121,8 +122,8 @@ struct nfsd4_create { struct nfs4_acl *cr_acl; struct xdr_netobj cr_label; }; -#define cr_linklen u.link.namelen -#define cr_linkname u.link.name +#define cr_datalen u.link.datalen +#define cr_data u.link.data #define cr_specdata1 u.dev.specdata1 #define cr_specdata2 u.dev.specdata2 @@ -478,6 +479,14 @@ struct nfsd4_op { bool nfsd4_cache_this_op(struct nfsd4_op *); +/* + * Memory needed just for the duration of processing one compound: + */ +struct svcxdr_tmpbuf { + struct svcxdr_tmpbuf *next; + char buf[]; +}; + struct nfsd4_compoundargs { /* scratch variables for XDR decode */ __be32 * p; @@ -486,11 +495,7 @@ struct nfsd4_compoundargs { int pagelen; __be32 tmp[8]; __be32 * tmpp; - struct tmpbuf { - struct tmpbuf *next; - void (*release)(const void *); - void *buf; - } *to_free; + struct svcxdr_tmpbuf *to_free; struct svc_rqst *rqstp; @@ -574,7 +579,6 @@ extern __be32 nfsd4_setclientid(struct svc_rqst *rqstp, extern __be32 nfsd4_setclientid_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *, struct nfsd4_setclientid_confirm *setclientid_confirm); -extern void nfsd4_store_cache_entry(struct nfsd4_compoundres *resp); extern __be32 nfsd4_exchange_id(struct svc_rqst *rqstp, struct nfsd4_compound_state *, struct nfsd4_exchange_id *); extern __be32 nfsd4_backchannel_ctl(struct svc_rqst *, struct nfsd4_compound_state *, struct nfsd4_backchannel_ctl *); @@ -585,6 +589,7 @@ extern __be32 nfsd4_create_session(struct svc_rqst *, extern __be32 nfsd4_sequence(struct svc_rqst *, struct nfsd4_compound_state *, struct nfsd4_sequence *); +extern void nfsd4_sequence_done(struct nfsd4_compoundres *resp); extern __be32 nfsd4_destroy_session(struct svc_rqst *, struct nfsd4_compound_state *, struct nfsd4_destroy_session *); @@ -594,7 +599,9 @@ extern __be32 nfsd4_process_open1(struct nfsd4_compound_state *, struct nfsd4_open *open, struct nfsd_net *nn); extern __be32 nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open); -extern void nfsd4_cleanup_open_state(struct nfsd4_open *open, __be32 status); +extern void nfsd4_cstate_clear_replay(struct nfsd4_compound_state *cstate); +extern void nfsd4_cleanup_open_state(struct nfsd4_compound_state *cstate, + struct nfsd4_open *open, __be32 status); extern __be32 nfsd4_open_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *, struct nfsd4_open_confirm *oc); extern __be32 nfsd4_close(struct svc_rqst *rqstp, @@ -625,6 +632,7 @@ extern __be32 nfsd4_test_stateid(struct svc_rqst *rqstp, extern __be32 nfsd4_free_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *, struct nfsd4_free_stateid *free_stateid); extern void nfsd4_bump_seqid(struct nfsd4_compound_state *, __be32 nfserr); + #endif /* |