diff options
Diffstat (limited to 'fs/nfs')
40 files changed, 1250 insertions, 712 deletions
diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig index 88e1763e02f3..e2a488d403a6 100644 --- a/fs/nfs/Kconfig +++ b/fs/nfs/Kconfig @@ -205,3 +205,12 @@ config NFS_DISABLE_UDP_SUPPORT Choose Y here to disable the use of NFS over UDP. NFS over UDP on modern networks (1Gb+) can lead to data corruption caused by fragmentation during high loads. + +config NFS_V4_2_READ_PLUS + bool "NFS: Enable support for the NFSv4.2 READ_PLUS operation" + depends on NFS_V4_2 + default n + help + This is intended for developers only. The READ_PLUS operation has + been shown to have issues under specific conditions and should not + be used in production. diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index 08108b6d2fa1..1a96ce28efb0 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c @@ -123,11 +123,6 @@ bl_alloc_init_bio(int npg, struct block_device *bdev, sector_t disk_sector, npg = min(npg, BIO_MAX_PAGES); bio = bio_alloc(GFP_NOIO, npg); - if (!bio && (current->flags & PF_MEMALLOC)) { - while (!bio && (npg /= 2)) - bio = bio_alloc(GFP_NOIO, npg); - } - if (bio) { bio->bi_iter.bi_sector = disk_sector; bio_set_dev(bio, bdev); @@ -697,7 +692,7 @@ bl_alloc_lseg(struct pnfs_layout_hdr *lo, struct nfs4_layoutget_res *lgr, xdr_init_decode_pages(&xdr, &buf, lgr->layoutp->pages, lgr->layoutp->len); - xdr_set_scratch_buffer(&xdr, page_address(scratch), PAGE_SIZE); + xdr_set_scratch_page(&xdr, scratch); status = -EIO; p = xdr_inline_decode(&xdr, 4); diff --git a/fs/nfs/blocklayout/dev.c b/fs/nfs/blocklayout/dev.c index dec5880ac6de..acb1d22907da 100644 --- a/fs/nfs/blocklayout/dev.c +++ b/fs/nfs/blocklayout/dev.c @@ -510,7 +510,7 @@ bl_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev, goto out; xdr_init_decode_pages(&xdr, &buf, pdev->pages, pdev->pglen); - xdr_set_scratch_buffer(&xdr, page_address(scratch), PAGE_SIZE); + xdr_set_scratch_page(&xdr, scratch); p = xdr_inline_decode(&xdr, sizeof(__be32)); if (!p) diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index e61dbc9b86ae..f7786e00a6a7 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -6,10 +6,15 @@ * * NFSv4 callback procedures */ + +#include <linux/errno.h> +#include <linux/math.h> #include <linux/nfs4.h> #include <linux/nfs_fs.h> #include <linux/slab.h> #include <linux/rcupdate.h> +#include <linux/types.h> + #include "nfs4_fs.h" #include "callback.h" #include "delegation.h" diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index 79ff172eb1c8..c5348ba81129 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c @@ -1060,6 +1060,7 @@ static const struct svc_procedure nfs4_callback_procedures1[] = { .pc_decode = nfs4_decode_void, .pc_encode = nfs4_encode_void, .pc_xdrressize = 1, + .pc_name = "NULL", }, [CB_COMPOUND] = { .pc_func = nfs4_callback_compound, @@ -1067,6 +1068,7 @@ static const struct svc_procedure nfs4_callback_procedures1[] = { .pc_argsize = 256, .pc_ressize = 256, .pc_xdrressize = NFS4_CALLBACK_BUFSIZE, + .pc_name = "COMPOUND", } }; diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 4b8cc93913f7..ff5c4d0d6d13 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -571,7 +571,7 @@ static int nfs_start_lockd(struct nfs_server *server) 1 : 0, .net = clp->cl_net, .nlmclnt_ops = clp->cl_nfs_mod->rpc_ops->nlmclnt_ops, - .cred = current_cred(), + .cred = server->cred, }; if (nlm_init.nfs_version > 3) @@ -781,8 +781,8 @@ static void nfs_server_set_fsinfo(struct nfs_server *server, server->wtmult = nfs_block_bits(fsinfo->wtmult, NULL); server->dtsize = nfs_block_size(fsinfo->dtpref, NULL); - if (server->dtsize > PAGE_SIZE * NFS_MAX_READDIR_PAGES) - server->dtsize = PAGE_SIZE * NFS_MAX_READDIR_PAGES; + if (server->dtsize > NFS_MAX_FILE_IO_SIZE) + server->dtsize = NFS_MAX_FILE_IO_SIZE; if (server->dtsize > server->rsize) server->dtsize = server->rsize; @@ -985,7 +985,7 @@ struct nfs_server *nfs_create_server(struct fs_context *fc) if (!server) return ERR_PTR(-ENOMEM); - server->cred = get_cred(current_cred()); + server->cred = get_cred(fc->cred); error = -ENOMEM; fattr = nfs_alloc_fattr(); diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 816e1427f17e..04bf8066980c 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -1011,22 +1011,24 @@ nfs_delegation_find_inode_server(struct nfs_server *server, const struct nfs_fh *fhandle) { struct nfs_delegation *delegation; - struct inode *freeme, *res = NULL; + struct super_block *freeme = NULL; + struct inode *res = NULL; list_for_each_entry_rcu(delegation, &server->delegations, super_list) { spin_lock(&delegation->lock); if (delegation->inode != NULL && !test_bit(NFS_DELEGATION_REVOKED, &delegation->flags) && nfs_compare_fh(fhandle, &NFS_I(delegation->inode)->fh) == 0) { - freeme = igrab(delegation->inode); - if (freeme && nfs_sb_active(freeme->i_sb)) - res = freeme; + if (nfs_sb_active(server->super)) { + freeme = server->super; + res = igrab(delegation->inode); + } spin_unlock(&delegation->lock); if (res != NULL) return res; if (freeme) { rcu_read_unlock(); - iput(freeme); + nfs_sb_deactive(freeme); rcu_read_lock(); } return ERR_PTR(-EAGAIN); diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index cb52db9a0cfb..19a9f434442f 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -68,7 +68,7 @@ const struct address_space_operations nfs_dir_aops = { .freepage = nfs_readdir_clear_array, }; -static struct nfs_open_dir_context *alloc_nfs_open_dir_context(struct inode *dir, const struct cred *cred) +static struct nfs_open_dir_context *alloc_nfs_open_dir_context(struct inode *dir) { struct nfs_inode *nfsi = NFS_I(dir); struct nfs_open_dir_context *ctx; @@ -78,7 +78,6 @@ static struct nfs_open_dir_context *alloc_nfs_open_dir_context(struct inode *dir ctx->attr_gencount = nfsi->attr_gencount; ctx->dir_cookie = 0; ctx->dup_cookie = 0; - ctx->cred = get_cred(cred); spin_lock(&dir->i_lock); if (list_empty(&nfsi->open_files) && (nfsi->cache_validity & NFS_INO_DATA_INVAL_DEFER)) @@ -96,7 +95,6 @@ static void put_nfs_open_dir_context(struct inode *dir, struct nfs_open_dir_cont spin_lock(&dir->i_lock); list_del(&ctx->list); spin_unlock(&dir->i_lock); - put_cred(ctx->cred); kfree(ctx); } @@ -113,7 +111,7 @@ nfs_opendir(struct inode *inode, struct file *filp) nfs_inc_stats(inode, NFSIOS_VFSOPEN); - ctx = alloc_nfs_open_dir_context(inode, current_cred()); + ctx = alloc_nfs_open_dir_context(inode); if (IS_ERR(ctx)) { res = PTR_ERR(ctx); goto out; @@ -133,43 +131,55 @@ nfs_closedir(struct inode *inode, struct file *filp) struct nfs_cache_array_entry { u64 cookie; u64 ino; - struct qstr string; + const char *name; + unsigned int name_len; unsigned char d_type; }; struct nfs_cache_array { - int size; - int eof_index; u64 last_cookie; + unsigned int size; + unsigned char page_full : 1, + page_is_eof : 1, + cookies_are_ordered : 1; struct nfs_cache_array_entry array[]; }; -typedef struct { +struct nfs_readdir_descriptor { struct file *file; struct page *page; struct dir_context *ctx; - unsigned long page_index; - u64 *dir_cookie; + pgoff_t page_index; + u64 dir_cookie; u64 last_cookie; + u64 dup_cookie; loff_t current_index; loff_t prev_index; + __be32 verf[NFS_DIR_VERIFIER_SIZE]; unsigned long dir_verifier; unsigned long timestamp; unsigned long gencount; + unsigned long attr_gencount; unsigned int cache_entry_index; + signed char duped; bool plus; bool eof; -} nfs_readdir_descriptor_t; +}; -static -void nfs_readdir_init_array(struct page *page) +static void nfs_readdir_array_init(struct nfs_cache_array *array) +{ + memset(array, 0, sizeof(struct nfs_cache_array)); +} + +static void nfs_readdir_page_init_array(struct page *page, u64 last_cookie) { struct nfs_cache_array *array; array = kmap_atomic(page); - memset(array, 0, sizeof(struct nfs_cache_array)); - array->eof_index = -1; + nfs_readdir_array_init(array); + array->last_cookie = last_cookie; + array->cookies_are_ordered = 1; kunmap_atomic(array); } @@ -184,61 +194,177 @@ void nfs_readdir_clear_array(struct page *page) array = kmap_atomic(page); for (i = 0; i < array->size; i++) - kfree(array->array[i].string.name); - array->size = 0; + kfree(array->array[i].name); + nfs_readdir_array_init(array); kunmap_atomic(array); } +static struct page * +nfs_readdir_page_array_alloc(u64 last_cookie, gfp_t gfp_flags) +{ + struct page *page = alloc_page(gfp_flags); + if (page) + nfs_readdir_page_init_array(page, last_cookie); + return page; +} + +static void nfs_readdir_page_array_free(struct page *page) +{ + if (page) { + nfs_readdir_clear_array(page); + put_page(page); + } +} + +static void nfs_readdir_array_set_eof(struct nfs_cache_array *array) +{ + array->page_is_eof = 1; + array->page_full = 1; +} + +static bool nfs_readdir_array_is_full(struct nfs_cache_array *array) +{ + return array->page_full; +} + /* * the caller is responsible for freeing qstr.name * when called by nfs_readdir_add_to_array, the strings will be freed in * nfs_clear_readdir_array() */ -static -int nfs_readdir_make_qstr(struct qstr *string, const char *name, unsigned int len) +static const char *nfs_readdir_copy_name(const char *name, unsigned int len) { - string->len = len; - string->name = kmemdup_nul(name, len, GFP_KERNEL); - if (string->name == NULL) - return -ENOMEM; + const char *ret = kmemdup_nul(name, len, GFP_KERNEL); + /* * Avoid a kmemleak false positive. The pointer to the name is stored * in a page cache page which kmemleak does not scan. */ - kmemleak_not_leak(string->name); - string->hash = full_name_hash(NULL, name, len); + if (ret != NULL) + kmemleak_not_leak(ret); + return ret; +} + +/* + * Check that the next array entry lies entirely within the page bounds + */ +static int nfs_readdir_array_can_expand(struct nfs_cache_array *array) +{ + struct nfs_cache_array_entry *cache_entry; + + if (array->page_full) + return -ENOSPC; + cache_entry = &array->array[array->size + 1]; + if ((char *)cache_entry - (char *)array > PAGE_SIZE) { + array->page_full = 1; + return -ENOSPC; + } return 0; } static int nfs_readdir_add_to_array(struct nfs_entry *entry, struct page *page) { - struct nfs_cache_array *array = kmap(page); + struct nfs_cache_array *array; struct nfs_cache_array_entry *cache_entry; + const char *name; int ret; - cache_entry = &array->array[array->size]; + name = nfs_readdir_copy_name(entry->name, entry->len); + if (!name) + return -ENOMEM; - /* Check that this entry lies within the page bounds */ - ret = -ENOSPC; - if ((char *)&cache_entry[1] - (char *)page_address(page) > PAGE_SIZE) + array = kmap_atomic(page); + ret = nfs_readdir_array_can_expand(array); + if (ret) { + kfree(name); goto out; + } + cache_entry = &array->array[array->size]; cache_entry->cookie = entry->prev_cookie; cache_entry->ino = entry->ino; cache_entry->d_type = entry->d_type; - ret = nfs_readdir_make_qstr(&cache_entry->string, entry->name, entry->len); - if (ret) - goto out; + cache_entry->name_len = entry->len; + cache_entry->name = name; array->last_cookie = entry->cookie; + if (array->last_cookie <= cache_entry->cookie) + array->cookies_are_ordered = 0; array->size++; if (entry->eof != 0) - array->eof_index = array->size; + nfs_readdir_array_set_eof(array); out: - kunmap(page); + kunmap_atomic(array); return ret; } +static struct page *nfs_readdir_page_get_locked(struct address_space *mapping, + pgoff_t index, u64 last_cookie) +{ + struct page *page; + + page = grab_cache_page(mapping, index); + if (page && !PageUptodate(page)) { + nfs_readdir_page_init_array(page, last_cookie); + if (invalidate_inode_pages2_range(mapping, index + 1, -1) < 0) + nfs_zap_mapping(mapping->host, mapping); + SetPageUptodate(page); + } + + return page; +} + +static u64 nfs_readdir_page_last_cookie(struct page *page) +{ + struct nfs_cache_array *array; + u64 ret; + + array = kmap_atomic(page); + ret = array->last_cookie; + kunmap_atomic(array); + return ret; +} + +static bool nfs_readdir_page_needs_filling(struct page *page) +{ + struct nfs_cache_array *array; + bool ret; + + array = kmap_atomic(page); + ret = !nfs_readdir_array_is_full(array); + kunmap_atomic(array); + return ret; +} + +static void nfs_readdir_page_set_eof(struct page *page) +{ + struct nfs_cache_array *array; + + array = kmap_atomic(page); + nfs_readdir_array_set_eof(array); + kunmap_atomic(array); +} + +static void nfs_readdir_page_unlock_and_put(struct page *page) +{ + unlock_page(page); + put_page(page); +} + +static struct page *nfs_readdir_page_get_next(struct address_space *mapping, + pgoff_t index, u64 cookie) +{ + struct page *page; + + page = nfs_readdir_page_get_locked(mapping, index, cookie); + if (page) { + if (nfs_readdir_page_last_cookie(page) == cookie) + return page; + nfs_readdir_page_unlock_and_put(page); + } + return NULL; +} + static inline int is_32bit_api(void) { @@ -258,8 +384,8 @@ bool nfs_readdir_use_cookie(const struct file *filp) return true; } -static -int nfs_readdir_search_for_pos(struct nfs_cache_array *array, nfs_readdir_descriptor_t *desc) +static int nfs_readdir_search_for_pos(struct nfs_cache_array *array, + struct nfs_readdir_descriptor *desc) { loff_t diff = desc->ctx->pos - desc->current_index; unsigned int index; @@ -267,13 +393,13 @@ int nfs_readdir_search_for_pos(struct nfs_cache_array *array, nfs_readdir_descri if (diff < 0) goto out_eof; if (diff >= array->size) { - if (array->eof_index >= 0) + if (array->page_is_eof) goto out_eof; return -EAGAIN; } index = (unsigned int)diff; - *desc->dir_cookie = array->array[index].cookie; + desc->dir_cookie = array->array[index].cookie; desc->cache_entry_index = index; return 0; out_eof: @@ -290,41 +416,55 @@ nfs_readdir_inode_mapping_valid(struct nfs_inode *nfsi) return !test_bit(NFS_INO_INVALIDATING, &nfsi->flags); } -static -int nfs_readdir_search_for_cookie(struct nfs_cache_array *array, nfs_readdir_descriptor_t *desc) +static bool nfs_readdir_array_cookie_in_range(struct nfs_cache_array *array, + u64 cookie) +{ + if (!array->cookies_are_ordered) + return true; + /* Optimisation for monotonically increasing cookies */ + if (cookie >= array->last_cookie) + return false; + if (array->size && cookie < array->array[0].cookie) + return false; + return true; +} + +static int nfs_readdir_search_for_cookie(struct nfs_cache_array *array, + struct nfs_readdir_descriptor *desc) { int i; loff_t new_pos; int status = -EAGAIN; + if (!nfs_readdir_array_cookie_in_range(array, desc->dir_cookie)) + goto check_eof; + for (i = 0; i < array->size; i++) { - if (array->array[i].cookie == *desc->dir_cookie) { + if (array->array[i].cookie == desc->dir_cookie) { struct nfs_inode *nfsi = NFS_I(file_inode(desc->file)); - struct nfs_open_dir_context *ctx = desc->file->private_data; new_pos = desc->current_index + i; - if (ctx->attr_gencount != nfsi->attr_gencount || + if (desc->attr_gencount != nfsi->attr_gencount || !nfs_readdir_inode_mapping_valid(nfsi)) { - ctx->duped = 0; - ctx->attr_gencount = nfsi->attr_gencount; + desc->duped = 0; + desc->attr_gencount = nfsi->attr_gencount; } else if (new_pos < desc->prev_index) { - if (ctx->duped > 0 - && ctx->dup_cookie == *desc->dir_cookie) { + if (desc->duped > 0 + && desc->dup_cookie == desc->dir_cookie) { if (printk_ratelimit()) { pr_notice("NFS: directory %pD2 contains a readdir loop." "Please contact your server vendor. " - "The file: %.*s has duplicate cookie %llu\n", - desc->file, array->array[i].string.len, - array->array[i].string.name, *desc->dir_cookie); + "The file: %s has duplicate cookie %llu\n", + desc->file, array->array[i].name, desc->dir_cookie); } status = -ELOOP; goto out; } - ctx->dup_cookie = *desc->dir_cookie; - ctx->duped = -1; + desc->dup_cookie = desc->dir_cookie; + desc->duped = -1; } if (nfs_readdir_use_cookie(desc->file)) - desc->ctx->pos = *desc->dir_cookie; + desc->ctx->pos = desc->dir_cookie; else desc->ctx->pos = new_pos; desc->prev_index = new_pos; @@ -332,24 +472,24 @@ int nfs_readdir_search_for_cookie(struct nfs_cache_array *array, nfs_readdir_des return 0; } } - if (array->eof_index >= 0) { +check_eof: + if (array->page_is_eof) { status = -EBADCOOKIE; - if (*desc->dir_cookie == array->last_cookie) + if (desc->dir_cookie == array->last_cookie) desc->eof = true; } out: return status; } -static -int nfs_readdir_search_array(nfs_readdir_descriptor_t *desc) +static int nfs_readdir_search_array(struct nfs_readdir_descriptor *desc) { struct nfs_cache_array *array; int status; - array = kmap(desc->page); + array = kmap_atomic(desc->page); - if (*desc->dir_cookie == 0) + if (desc->dir_cookie == 0) status = nfs_readdir_search_for_pos(array, desc); else status = nfs_readdir_search_for_cookie(array, desc); @@ -359,17 +499,29 @@ int nfs_readdir_search_array(nfs_readdir_descriptor_t *desc) desc->current_index += array->size; desc->page_index++; } - kunmap(desc->page); + kunmap_atomic(array); return status; } /* Fill a page with xdr information before transferring to the cache page */ -static -int nfs_readdir_xdr_filler(struct page **pages, nfs_readdir_descriptor_t *desc, - struct nfs_entry *entry, struct file *file, struct inode *inode) +static int nfs_readdir_xdr_filler(struct nfs_readdir_descriptor *desc, + __be32 *verf, u64 cookie, + struct page **pages, size_t bufsize, + __be32 *verf_res) { - struct nfs_open_dir_context *ctx = file->private_data; - const struct cred *cred = ctx->cred; + struct inode *inode = file_inode(desc->file); + struct nfs_readdir_arg arg = { + .dentry = file_dentry(desc->file), + .cred = desc->file->f_cred, + .verf = verf, + .cookie = cookie, + .pages = pages, + .page_len = bufsize, + .plus = desc->plus, + }; + struct nfs_readdir_res res = { + .verf = verf_res, + }; unsigned long timestamp, gencount; int error; @@ -377,14 +529,13 @@ int nfs_readdir_xdr_filler(struct page **pages, nfs_readdir_descriptor_t *desc, timestamp = jiffies; gencount = nfs_inc_attr_generation_counter(); desc->dir_verifier = nfs_save_change_attribute(inode); - error = NFS_PROTO(inode)->readdir(file_dentry(file), cred, entry->cookie, pages, - NFS_SERVER(inode)->dtsize, desc->plus); + error = NFS_PROTO(inode)->readdir(&arg, &res); if (error < 0) { /* We requested READDIRPLUS, but the server doesn't grok it */ if (error == -ENOTSUPP && desc->plus) { NFS_SERVER(inode)->caps &= ~NFS_CAP_READDIRPLUS; clear_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(inode)->flags); - desc->plus = false; + desc->plus = arg.plus = false; goto again; } goto error; @@ -395,7 +546,7 @@ error: return error; } -static int xdr_decode(nfs_readdir_descriptor_t *desc, +static int xdr_decode(struct nfs_readdir_descriptor *desc, struct nfs_entry *entry, struct xdr_stream *xdr) { struct inode *inode = file_inode(desc->file); @@ -557,235 +708,263 @@ out: } /* Perform conversion from xdr to cache array */ -static -int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *entry, - struct page **xdr_pages, struct page *page, unsigned int buflen) -{ +static int nfs_readdir_page_filler(struct nfs_readdir_descriptor *desc, + struct nfs_entry *entry, + struct page **xdr_pages, + unsigned int buflen, + struct page **arrays, + size_t narrays) +{ + struct address_space *mapping = desc->file->f_mapping; struct xdr_stream stream; struct xdr_buf buf; - struct page *scratch; - struct nfs_cache_array *array; - unsigned int count = 0; + struct page *scratch, *new, *page = *arrays; int status; scratch = alloc_page(GFP_KERNEL); if (scratch == NULL) return -ENOMEM; - if (buflen == 0) - goto out_nopages; - xdr_init_decode_pages(&stream, &buf, xdr_pages, buflen); - xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE); + xdr_set_scratch_page(&stream, scratch); do { if (entry->label) entry->label->len = NFS4_MAXLABELLEN; status = xdr_decode(desc, entry, &stream); - if (status != 0) { - if (status == -EAGAIN) - status = 0; + if (status != 0) break; - } - - count++; if (desc->plus) nfs_prime_dcache(file_dentry(desc->file), entry, desc->dir_verifier); status = nfs_readdir_add_to_array(entry, page); - if (status != 0) - break; - } while (!entry->eof); + if (status != -ENOSPC) + continue; -out_nopages: - if (count == 0 || (status == -EBADCOOKIE && entry->eof != 0)) { - array = kmap(page); - array->eof_index = array->size; + if (page->mapping != mapping) { + if (!--narrays) + break; + new = nfs_readdir_page_array_alloc(entry->prev_cookie, + GFP_KERNEL); + if (!new) + break; + arrays++; + *arrays = page = new; + } else { + new = nfs_readdir_page_get_next(mapping, + page->index + 1, + entry->prev_cookie); + if (!new) + break; + if (page != *arrays) + nfs_readdir_page_unlock_and_put(page); + page = new; + } + status = nfs_readdir_add_to_array(entry, page); + } while (!status && !entry->eof); + + switch (status) { + case -EBADCOOKIE: + if (entry->eof) { + nfs_readdir_page_set_eof(page); + status = 0; + } + break; + case -ENOSPC: + case -EAGAIN: status = 0; - kunmap(page); + break; } + if (page != *arrays) + nfs_readdir_page_unlock_and_put(page); + put_page(scratch); return status; } -static -void nfs_readdir_free_pages(struct page **pages, unsigned int npages) +static void nfs_readdir_free_pages(struct page **pages, size_t npages) { - unsigned int i; - for (i = 0; i < npages; i++) - put_page(pages[i]); + while (npages--) + put_page(pages[npages]); + kfree(pages); } /* * nfs_readdir_alloc_pages() will allocate pages that must be freed with a call * to nfs_readdir_free_pages() */ -static -int nfs_readdir_alloc_pages(struct page **pages, unsigned int npages) +static struct page **nfs_readdir_alloc_pages(size_t npages) { - unsigned int i; + struct page **pages; + size_t i; + pages = kmalloc_array(npages, sizeof(*pages), GFP_KERNEL); + if (!pages) + return NULL; for (i = 0; i < npages; i++) { struct page *page = alloc_page(GFP_KERNEL); if (page == NULL) goto out_freepages; pages[i] = page; } - return 0; + return pages; out_freepages: nfs_readdir_free_pages(pages, i); - return -ENOMEM; + return NULL; } -static -int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page, struct inode *inode) +static int nfs_readdir_xdr_to_array(struct nfs_readdir_descriptor *desc, + __be32 *verf_arg, __be32 *verf_res, + struct page **arrays, size_t narrays) { - struct page *pages[NFS_MAX_READDIR_PAGES]; - struct nfs_entry entry; - struct file *file = desc->file; - struct nfs_cache_array *array; + struct page **pages; + struct page *page = *arrays; + struct nfs_entry *entry; + size_t array_size; + struct inode *inode = file_inode(desc->file); + size_t dtsize = NFS_SERVER(inode)->dtsize; int status = -ENOMEM; - unsigned int array_size = ARRAY_SIZE(pages); - nfs_readdir_init_array(page); - - entry.prev_cookie = 0; - entry.cookie = desc->last_cookie; - entry.eof = 0; - entry.fh = nfs_alloc_fhandle(); - entry.fattr = nfs_alloc_fattr(); - entry.server = NFS_SERVER(inode); - if (entry.fh == NULL || entry.fattr == NULL) + entry = kzalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) + return -ENOMEM; + entry->cookie = nfs_readdir_page_last_cookie(page); + entry->fh = nfs_alloc_fhandle(); + entry->fattr = nfs_alloc_fattr(); + entry->server = NFS_SERVER(inode); + if (entry->fh == NULL || entry->fattr == NULL) goto out; - entry.label = nfs4_label_alloc(NFS_SERVER(inode), GFP_NOWAIT); - if (IS_ERR(entry.label)) { - status = PTR_ERR(entry.label); + entry->label = nfs4_label_alloc(NFS_SERVER(inode), GFP_NOWAIT); + if (IS_ERR(entry->label)) { + status = PTR_ERR(entry->label); goto out; } - array = kmap(page); + array_size = (dtsize + PAGE_SIZE - 1) >> PAGE_SHIFT; + pages = nfs_readdir_alloc_pages(array_size); + if (!pages) + goto out_release_label; - status = nfs_readdir_alloc_pages(pages, array_size); - if (status < 0) - goto out_release_array; do { unsigned int pglen; - status = nfs_readdir_xdr_filler(pages, desc, &entry, file, inode); - + status = nfs_readdir_xdr_filler(desc, verf_arg, entry->cookie, + pages, dtsize, + verf_res); if (status < 0) break; + pglen = status; - status = nfs_readdir_page_filler(desc, &entry, pages, page, pglen); - if (status < 0) { - if (status == -ENOSPC) - status = 0; + if (pglen == 0) { + nfs_readdir_page_set_eof(page); break; } - } while (array->eof_index < 0); + + status = nfs_readdir_page_filler(desc, entry, pages, pglen, + arrays, narrays); + } while (!status && nfs_readdir_page_needs_filling(page)); nfs_readdir_free_pages(pages, array_size); -out_release_array: - kunmap(page); - nfs4_label_free(entry.label); +out_release_label: + nfs4_label_free(entry->label); out: - nfs_free_fattr(entry.fattr); - nfs_free_fhandle(entry.fh); + nfs_free_fattr(entry->fattr); + nfs_free_fhandle(entry->fh); + kfree(entry); return status; } -/* - * Now we cache directories properly, by converting xdr information - * to an array that can be used for lookups later. This results in - * fewer cache pages, since we can store more information on each page. - * We only need to convert from xdr once so future lookups are much simpler - */ -static -int nfs_readdir_filler(void *data, struct page* page) +static void nfs_readdir_page_put(struct nfs_readdir_descriptor *desc) { - nfs_readdir_descriptor_t *desc = data; - struct inode *inode = file_inode(desc->file); - int ret; - - ret = nfs_readdir_xdr_to_array(desc, page, inode); - if (ret < 0) - goto error; - SetPageUptodate(page); - - if (invalidate_inode_pages2_range(inode->i_mapping, page->index + 1, -1) < 0) { - /* Should never happen */ - nfs_zap_mapping(inode, inode->i_mapping); - } - unlock_page(page); - return 0; - error: - nfs_readdir_clear_array(page); - unlock_page(page); - return ret; + put_page(desc->page); + desc->page = NULL; } -static -void cache_page_release(nfs_readdir_descriptor_t *desc) +static void +nfs_readdir_page_unlock_and_put_cached(struct nfs_readdir_descriptor *desc) { - put_page(desc->page); - desc->page = NULL; + unlock_page(desc->page); + nfs_readdir_page_put(desc); } -static -struct page *get_cache_page(nfs_readdir_descriptor_t *desc) +static struct page * +nfs_readdir_page_get_cached(struct nfs_readdir_descriptor *desc) { - return read_cache_page(desc->file->f_mapping, desc->page_index, - nfs_readdir_filler, desc); + return nfs_readdir_page_get_locked(desc->file->f_mapping, + desc->page_index, + desc->last_cookie); } /* * Returns 0 if desc->dir_cookie was found on page desc->page_index * and locks the page to prevent removal from the page cache. */ -static -int find_and_lock_cache_page(nfs_readdir_descriptor_t *desc) +static int find_and_lock_cache_page(struct nfs_readdir_descriptor *desc) { struct inode *inode = file_inode(desc->file); struct nfs_inode *nfsi = NFS_I(inode); + __be32 verf[NFS_DIR_VERIFIER_SIZE]; int res; - desc->page = get_cache_page(desc); - if (IS_ERR(desc->page)) - return PTR_ERR(desc->page); - res = lock_page_killable(desc->page); - if (res != 0) - goto error; - res = -EAGAIN; - if (desc->page->mapping != NULL) { - res = nfs_readdir_search_array(desc); - if (res == 0) { - nfsi->page_index = desc->page_index; - return 0; + desc->page = nfs_readdir_page_get_cached(desc); + if (!desc->page) + return -ENOMEM; + if (nfs_readdir_page_needs_filling(desc->page)) { + res = nfs_readdir_xdr_to_array(desc, nfsi->cookieverf, verf, + &desc->page, 1); + if (res < 0) { + nfs_readdir_page_unlock_and_put_cached(desc); + if (res == -EBADCOOKIE || res == -ENOTSYNC) { + invalidate_inode_pages2(desc->file->f_mapping); + desc->page_index = 0; + return -EAGAIN; + } + return res; } + memcpy(nfsi->cookieverf, verf, sizeof(nfsi->cookieverf)); } - unlock_page(desc->page); -error: - cache_page_release(desc); + res = nfs_readdir_search_array(desc); + if (res == 0) { + nfsi->page_index = desc->page_index; + return 0; + } + nfs_readdir_page_unlock_and_put_cached(desc); return res; } +static bool nfs_readdir_dont_search_cache(struct nfs_readdir_descriptor *desc) +{ + struct address_space *mapping = desc->file->f_mapping; + struct inode *dir = file_inode(desc->file); + unsigned int dtsize = NFS_SERVER(dir)->dtsize; + loff_t size = i_size_read(dir); + + /* + * Default to uncached readdir if the page cache is empty, and + * we're looking for a non-zero cookie in a large directory. + */ + return desc->dir_cookie != 0 && mapping->nrpages == 0 && size > dtsize; +} + /* Search for desc->dir_cookie from the beginning of the page cache */ -static inline -int readdir_search_pagecache(nfs_readdir_descriptor_t *desc) +static int readdir_search_pagecache(struct nfs_readdir_descriptor *desc) { int res; - if (desc->page_index == 0) { - desc->current_index = 0; - desc->prev_index = 0; - desc->last_cookie = 0; - } + if (nfs_readdir_dont_search_cache(desc)) + return -EBADCOOKIE; + do { + if (desc->page_index == 0) { + desc->current_index = 0; + desc->prev_index = 0; + desc->last_cookie = 0; + } res = find_and_lock_cache_page(desc); } while (res == -EAGAIN); return res; @@ -794,43 +973,41 @@ int readdir_search_pagecache(nfs_readdir_descriptor_t *desc) /* * Once we've found the start of the dirent within a page: fill 'er up... */ -static -int nfs_do_filldir(nfs_readdir_descriptor_t *desc) +static void nfs_do_filldir(struct nfs_readdir_descriptor *desc) { struct file *file = desc->file; - int i = 0; - int res = 0; - struct nfs_cache_array *array = NULL; - struct nfs_open_dir_context *ctx = file->private_data; + struct nfs_inode *nfsi = NFS_I(file_inode(file)); + struct nfs_cache_array *array; + unsigned int i = 0; array = kmap(desc->page); for (i = desc->cache_entry_index; i < array->size; i++) { struct nfs_cache_array_entry *ent; ent = &array->array[i]; - if (!dir_emit(desc->ctx, ent->string.name, ent->string.len, + if (!dir_emit(desc->ctx, ent->name, ent->name_len, nfs_compat_user_ino64(ent->ino), ent->d_type)) { desc->eof = true; break; } + memcpy(desc->verf, nfsi->cookieverf, sizeof(desc->verf)); if (i < (array->size-1)) - *desc->dir_cookie = array->array[i+1].cookie; + desc->dir_cookie = array->array[i+1].cookie; else - *desc->dir_cookie = array->last_cookie; + desc->dir_cookie = array->last_cookie; if (nfs_readdir_use_cookie(file)) - desc->ctx->pos = *desc->dir_cookie; + desc->ctx->pos = desc->dir_cookie; else desc->ctx->pos++; - if (ctx->duped != 0) - ctx->duped = 1; + if (desc->duped != 0) + desc->duped = 1; } - if (array->eof_index >= 0) + if (array->page_is_eof) desc->eof = true; kunmap(desc->page); - dfprintk(DIRCACHE, "NFS: nfs_do_filldir() filling ended @ cookie %Lu; returning = %d\n", - (unsigned long long)*desc->dir_cookie, res); - return res; + dfprintk(DIRCACHE, "NFS: nfs_do_filldir() filling ended @ cookie %llu\n", + (unsigned long long)desc->dir_cookie); } /* @@ -845,40 +1022,41 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc) * we should already have a complete representation of the * directory in the page cache by the time we get here. */ -static inline -int uncached_readdir(nfs_readdir_descriptor_t *desc) +static int uncached_readdir(struct nfs_readdir_descriptor *desc) { - struct page *page = NULL; - int status; - struct inode *inode = file_inode(desc->file); - struct nfs_open_dir_context *ctx = desc->file->private_data; + struct page **arrays; + size_t i, sz = 512; + __be32 verf[NFS_DIR_VERIFIER_SIZE]; + int status = -ENOMEM; - dfprintk(DIRCACHE, "NFS: uncached_readdir() searching for cookie %Lu\n", - (unsigned long long)*desc->dir_cookie); + dfprintk(DIRCACHE, "NFS: uncached_readdir() searching for cookie %llu\n", + (unsigned long long)desc->dir_cookie); - page = alloc_page(GFP_HIGHUSER); - if (!page) { - status = -ENOMEM; + arrays = kcalloc(sz, sizeof(*arrays), GFP_KERNEL); + if (!arrays) + goto out; + arrays[0] = nfs_readdir_page_array_alloc(desc->dir_cookie, GFP_KERNEL); + if (!arrays[0]) goto out; - } desc->page_index = 0; - desc->last_cookie = *desc->dir_cookie; - desc->page = page; - ctx->duped = 0; - - status = nfs_readdir_xdr_to_array(desc, page, inode); - if (status < 0) - goto out_release; - - status = nfs_do_filldir(desc); - - out_release: - nfs_readdir_clear_array(desc->page); - cache_page_release(desc); - out: - dfprintk(DIRCACHE, "NFS: %s: returns %d\n", - __func__, status); + desc->last_cookie = desc->dir_cookie; + desc->duped = 0; + + status = nfs_readdir_xdr_to_array(desc, desc->verf, verf, arrays, sz); + + for (i = 0; !desc->eof && i < sz && arrays[i]; i++) { + desc->page = arrays[i]; + nfs_do_filldir(desc); + } + desc->page = NULL; + + + for (i = 0; i < sz && arrays[i]; i++) + nfs_readdir_page_array_free(arrays[i]); +out: + kfree(arrays); + dfprintk(DIRCACHE, "NFS: %s: returns %d\n", __func__, status); return status; } @@ -891,14 +1069,8 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx) struct dentry *dentry = file_dentry(file); struct inode *inode = d_inode(dentry); struct nfs_open_dir_context *dir_ctx = file->private_data; - nfs_readdir_descriptor_t my_desc = { - .file = file, - .ctx = ctx, - .dir_cookie = &dir_ctx->dir_cookie, - .plus = nfs_use_readdirplus(inode, ctx), - }, - *desc = &my_desc; - int res = 0; + struct nfs_readdir_descriptor *desc; + int res; dfprintk(FILE, "NFS: readdir(%pD2) starting at cookie %llu\n", file, (long long)ctx->pos); @@ -910,10 +1082,27 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx) * to either find the entry with the appropriate number or * revalidate the cookie. */ - if (ctx->pos == 0 || nfs_attribute_cache_expired(inode)) + if (ctx->pos == 0 || nfs_attribute_cache_expired(inode)) { res = nfs_revalidate_mapping(inode, file->f_mapping); - if (res < 0) + if (res < 0) + goto out; + } + + res = -ENOMEM; + desc = kzalloc(sizeof(*desc), GFP_KERNEL); + if (!desc) goto out; + desc->file = file; + desc->ctx = ctx; + desc->plus = nfs_use_readdirplus(inode, ctx); + + spin_lock(&file->f_lock); + desc->dir_cookie = dir_ctx->dir_cookie; + desc->dup_cookie = dir_ctx->dup_cookie; + desc->duped = dir_ctx->duped; + desc->attr_gencount = dir_ctx->attr_gencount; + memcpy(desc->verf, dir_ctx->verf, sizeof(desc->verf)); + spin_unlock(&file->f_lock); do { res = readdir_search_pagecache(desc); @@ -921,11 +1110,13 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx) if (res == -EBADCOOKIE) { res = 0; /* This means either end of directory */ - if (*desc->dir_cookie && !desc->eof) { + if (desc->dir_cookie && !desc->eof) { /* Or that the server has 'lost' a cookie */ res = uncached_readdir(desc); if (res == 0) continue; + if (res == -EBADCOOKIE || res == -ENOTSYNC) + res = 0; } break; } @@ -940,22 +1131,27 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx) if (res < 0) break; - res = nfs_do_filldir(desc); - unlock_page(desc->page); - cache_page_release(desc); - if (res < 0) - break; + nfs_do_filldir(desc); + nfs_readdir_page_unlock_and_put_cached(desc); } while (!desc->eof); + + spin_lock(&file->f_lock); + dir_ctx->dir_cookie = desc->dir_cookie; + dir_ctx->dup_cookie = desc->dup_cookie; + dir_ctx->duped = desc->duped; + dir_ctx->attr_gencount = desc->attr_gencount; + memcpy(dir_ctx->verf, desc->verf, sizeof(dir_ctx->verf)); + spin_unlock(&file->f_lock); + + kfree(desc); + out: - if (res > 0) - res = 0; dfprintk(FILE, "NFS: readdir(%pD2) returns %d\n", file, res); return res; } static loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int whence) { - struct inode *inode = file_inode(filp); struct nfs_open_dir_context *dir_ctx = filp->private_data; dfprintk(FILE, "NFS: llseek dir(%pD2, %lld, %d)\n", @@ -967,15 +1163,15 @@ static loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int whence) case SEEK_SET: if (offset < 0) return -EINVAL; - inode_lock(inode); + spin_lock(&filp->f_lock); break; case SEEK_CUR: if (offset == 0) return filp->f_pos; - inode_lock(inode); + spin_lock(&filp->f_lock); offset += filp->f_pos; if (offset < 0) { - inode_unlock(inode); + spin_unlock(&filp->f_lock); return -EINVAL; } } @@ -985,9 +1181,11 @@ static loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int whence) dir_ctx->dir_cookie = offset; else dir_ctx->dir_cookie = 0; + if (offset == 0) + memset(dir_ctx->verf, 0, sizeof(dir_ctx->verf)); dir_ctx->duped = 0; } - inode_unlock(inode); + spin_unlock(&filp->f_lock); return offset; } @@ -998,13 +1196,9 @@ static loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int whence) static int nfs_fsync_dir(struct file *filp, loff_t start, loff_t end, int datasync) { - struct inode *inode = file_inode(filp); - dfprintk(FILE, "NFS: fsync dir(%pD2) datasync %d\n", filp, datasync); - inode_lock(inode); - nfs_inc_stats(inode, NFSIOS_VFSFSYNC); - inode_unlock(inode); + nfs_inc_stats(file_inode(filp), NFSIOS_VFSFSYNC); return 0; } @@ -1901,8 +2095,8 @@ EXPORT_SYMBOL_GPL(nfs_instantiate); * that the operation succeeded on the server, but an error in the * reply path made it appear to have failed. */ -int nfs_create(struct inode *dir, struct dentry *dentry, - umode_t mode, bool excl) +int nfs_create(struct user_namespace *mnt_userns, struct inode *dir, + struct dentry *dentry, umode_t mode, bool excl) { struct iattr attr; int open_flags = excl ? O_CREAT | O_EXCL : O_CREAT; @@ -1930,7 +2124,8 @@ EXPORT_SYMBOL_GPL(nfs_create); * See comments for nfs_proc_create regarding failed operations. */ int -nfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rdev) +nfs_mknod(struct user_namespace *mnt_userns, struct inode *dir, + struct dentry *dentry, umode_t mode, dev_t rdev) { struct iattr attr; int status; @@ -1956,7 +2151,8 @@ EXPORT_SYMBOL_GPL(nfs_mknod); /* * See comments for nfs_proc_create regarding failed operations. */ -int nfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) +int nfs_mkdir(struct user_namespace *mnt_userns, struct inode *dir, + struct dentry *dentry, umode_t mode) { struct iattr attr; int error; @@ -2101,7 +2297,8 @@ EXPORT_SYMBOL_GPL(nfs_unlink); * now have a new file handle and can instantiate an in-core NFS inode * and move the raw page into its mapping. */ -int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname) +int nfs_symlink(struct user_namespace *mnt_userns, struct inode *dir, + struct dentry *dentry, const char *symname) { struct page *page; char *kaddr; @@ -2204,9 +2401,9 @@ EXPORT_SYMBOL_GPL(nfs_link); * If these conditions are met, we can drop the dentries before doing * the rename. */ -int nfs_rename(struct inode *old_dir, struct dentry *old_dentry, - struct inode *new_dir, struct dentry *new_dentry, - unsigned int flags) +int nfs_rename(struct user_namespace *mnt_userns, struct inode *old_dir, + struct dentry *old_dentry, struct inode *new_dir, + struct dentry *new_dentry, unsigned int flags) { struct inode *old_inode = d_inode(old_dentry); struct inode *new_inode = d_inode(new_dentry); @@ -2745,7 +2942,9 @@ static int nfs_execute_ok(struct inode *inode, int mask) return ret; } -int nfs_permission(struct inode *inode, int mask) +int nfs_permission(struct user_namespace *mnt_userns, + struct inode *inode, + int mask) { const struct cred *cred = current_cred(); int res = 0; @@ -2793,7 +2992,7 @@ out_notsup: res = nfs_revalidate_inode(NFS_SERVER(inode), inode); if (res == 0) - res = generic_permission(inode, mask); + res = generic_permission(&init_user_ns, inode, mask); goto out; } EXPORT_SYMBOL_GPL(nfs_permission); diff --git a/fs/nfs/export.c b/fs/nfs/export.c index 3430d6891e89..f2b34cfe286c 100644 --- a/fs/nfs/export.c +++ b/fs/nfs/export.c @@ -167,8 +167,29 @@ out: return parent; } +static u64 nfs_fetch_iversion(struct inode *inode) +{ + struct nfs_server *server = NFS_SERVER(inode); + + /* Is this the right call?: */ + nfs_revalidate_inode(server, inode); + /* + * Also, note we're ignoring any returned error. That seems to be + * the practice for cache consistency information elsewhere in + * the server, but I'm not sure why. + */ + if (server->nfs_client->rpc_ops->version >= 4) + return inode_peek_iversion_raw(inode); + else + return time_to_chattr(&inode->i_ctime); +} + const struct export_operations nfs_export_ops = { .encode_fh = nfs_encode_fh, .fh_to_dentry = nfs_fh_to_dentry, .get_parent = nfs_get_parent, + .fetch_iversion = nfs_fetch_iversion, + .flags = EXPORT_OP_NOWCC|EXPORT_OP_NOSUBTREECHK| + EXPORT_OP_CLOSE_BEFORE_UNLINK|EXPORT_OP_REMOTE_FS| + EXPORT_OP_NOATOMIC_ATTR, }; diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c index 7f5aa0403e16..d158a500c25c 100644 --- a/fs/nfs/filelayout/filelayout.c +++ b/fs/nfs/filelayout/filelayout.c @@ -666,7 +666,7 @@ filelayout_decode_layout(struct pnfs_layout_hdr *flo, return -ENOMEM; xdr_init_decode_pages(&stream, &buf, lgr->layoutp->pages, lgr->layoutp->len); - xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE); + xdr_set_scratch_page(&stream, scratch); /* 20 = ufl_util (4), first_stripe_index (4), pattern_offset (8), * num_fh (4) */ diff --git a/fs/nfs/filelayout/filelayoutdev.c b/fs/nfs/filelayout/filelayoutdev.c index d913e818858f..86c3f7e69ec4 100644 --- a/fs/nfs/filelayout/filelayoutdev.c +++ b/fs/nfs/filelayout/filelayoutdev.c @@ -82,7 +82,7 @@ nfs4_fl_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev, goto out_err; xdr_init_decode_pages(&stream, &buf, pdev->pages, pdev->pglen); - xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE); + xdr_set_scratch_page(&stream, scratch); /* Get the stripe count (number of stripe index) */ p = xdr_inline_decode(&stream, 4); diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index a163533446fa..872112bffcab 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -378,7 +378,7 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh, xdr_init_decode_pages(&stream, &buf, lgr->layoutp->pages, lgr->layoutp->len); - xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE); + xdr_set_scratch_page(&stream, scratch); /* stripe unit and mirror_array_cnt */ rc = -EIO; @@ -740,16 +740,12 @@ ff_layout_choose_ds_for_read(struct pnfs_layout_segment *lseg, struct nfs4_ff_layout_segment *fls = FF_LAYOUT_LSEG(lseg); struct nfs4_ff_layout_mirror *mirror; struct nfs4_pnfs_ds *ds; - bool fail_return = false; u32 idx; /* mirrors are initially sorted by efficiency */ for (idx = start_idx; idx < fls->mirror_array_cnt; idx++) { - if (idx+1 == fls->mirror_array_cnt) - fail_return = !check_device; - mirror = FF_LAYOUT_COMP(lseg, idx); - ds = nfs4_ff_layout_prepare_ds(lseg, mirror, fail_return); + ds = nfs4_ff_layout_prepare_ds(lseg, mirror, false); if (!ds) continue; @@ -838,7 +834,7 @@ ff_layout_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_pgio_mirror *pgm; struct nfs4_ff_layout_mirror *mirror; struct nfs4_pnfs_ds *ds; - u32 ds_idx, i; + u32 ds_idx; retry: ff_layout_pg_check_layout(pgio, req); @@ -864,11 +860,9 @@ retry: goto retry; } - for (i = 0; i < pgio->pg_mirror_count; i++) { - mirror = FF_LAYOUT_COMP(pgio->pg_lseg, i); - pgm = &pgio->pg_mirrors[i]; - pgm->pg_bsize = mirror->mirror_ds->ds_versions[0].rsize; - } + mirror = FF_LAYOUT_COMP(pgio->pg_lseg, ds_idx); + pgm = &pgio->pg_mirrors[0]; + pgm->pg_bsize = mirror->mirror_ds->ds_versions[0].rsize; pgio->pg_mirror_idx = ds_idx; @@ -985,6 +979,21 @@ out: return 1; } +static u32 +ff_layout_pg_set_mirror_write(struct nfs_pageio_descriptor *desc, u32 idx) +{ + u32 old = desc->pg_mirror_idx; + + desc->pg_mirror_idx = idx; + return old; +} + +static struct nfs_pgio_mirror * +ff_layout_pg_get_mirror_write(struct nfs_pageio_descriptor *desc, u32 idx) +{ + return &desc->pg_mirrors[idx]; +} + static const struct nfs_pageio_ops ff_layout_pg_read_ops = { .pg_init = ff_layout_pg_init_read, .pg_test = pnfs_generic_pg_test, @@ -998,6 +1007,8 @@ static const struct nfs_pageio_ops ff_layout_pg_write_ops = { .pg_doio = pnfs_generic_pg_writepages, .pg_get_mirror_count = ff_layout_pg_get_mirror_count_write, .pg_cleanup = pnfs_generic_pg_cleanup, + .pg_get_mirror = ff_layout_pg_get_mirror_write, + .pg_set_mirror = ff_layout_pg_set_mirror_write, }; static void ff_layout_reset_write(struct nfs_pgio_header *hdr, bool retry_pnfs) @@ -1041,7 +1052,7 @@ static void ff_layout_resend_pnfs_read(struct nfs_pgio_header *hdr) u32 idx = hdr->pgio_mirror_idx + 1; u32 new_idx = 0; - if (ff_layout_choose_any_ds_for_read(hdr->lseg, idx + 1, &new_idx)) + if (ff_layout_choose_any_ds_for_read(hdr->lseg, idx, &new_idx)) ff_layout_send_layouterror(hdr->lseg); else pnfs_error_mark_layout_for_return(hdr->inode, hdr->lseg); @@ -2269,7 +2280,6 @@ ff_layout_encode_netaddr(struct xdr_stream *xdr, struct nfs4_pnfs_ds_addr *da) struct sockaddr *sap = (struct sockaddr *)&da->da_addr; char portbuf[RPCBIND_MAXUADDRPLEN]; char addrbuf[RPCBIND_MAXUADDRLEN]; - char *netid; unsigned short port; int len, netid_len; __be32 *p; @@ -2279,18 +2289,13 @@ ff_layout_encode_netaddr(struct xdr_stream *xdr, struct nfs4_pnfs_ds_addr *da) if (ff_layout_ntop4(sap, addrbuf, sizeof(addrbuf)) == 0) return; port = ntohs(((struct sockaddr_in *)sap)->sin_port); - netid = "tcp"; - netid_len = 3; break; case AF_INET6: if (ff_layout_ntop6_noscopeid(sap, addrbuf, sizeof(addrbuf)) == 0) return; port = ntohs(((struct sockaddr_in6 *)sap)->sin6_port); - netid = "tcp6"; - netid_len = 4; break; default: - /* we only support tcp and tcp6 */ WARN_ON_ONCE(1); return; } @@ -2298,8 +2303,9 @@ ff_layout_encode_netaddr(struct xdr_stream *xdr, struct nfs4_pnfs_ds_addr *da) snprintf(portbuf, sizeof(portbuf), ".%u.%u", port >> 8, port & 0xff); len = strlcat(addrbuf, portbuf, sizeof(addrbuf)); + netid_len = strlen(da->da_netid); p = xdr_reserve_space(xdr, 4 + netid_len); - xdr_encode_opaque(p, netid, netid_len); + xdr_encode_opaque(p, da->da_netid, netid_len); p = xdr_reserve_space(xdr, 4 + len); xdr_encode_opaque(p, addrbuf, len); diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c index 3eda40a320a5..c9b61b818ec1 100644 --- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c +++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c @@ -69,7 +69,7 @@ nfs4_ff_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev, INIT_LIST_HEAD(&dsaddrs); xdr_init_decode_pages(&stream, &buf, pdev->pages, pdev->pglen); - xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE); + xdr_set_scratch_page(&stream, scratch); /* multipath count */ p = xdr_inline_decode(&stream, 4); diff --git a/fs/nfs/fs_context.c b/fs/nfs/fs_context.c index 29ec8b09a52d..06894bcdea2d 100644 --- a/fs/nfs/fs_context.c +++ b/fs/nfs/fs_context.c @@ -510,13 +510,12 @@ static int nfs_fs_context_parse_param(struct fs_context *fc, ctx->nfs_server.protocol = XPRT_TRANSPORT_UDP; break; case Opt_tcp: - ctx->flags |= NFS_MOUNT_TCP; - ctx->nfs_server.protocol = XPRT_TRANSPORT_TCP; - break; case Opt_rdma: ctx->flags |= NFS_MOUNT_TCP; /* for side protocols */ - ctx->nfs_server.protocol = XPRT_TRANSPORT_RDMA; - xprt_load_transport(param->key); + ret = xprt_find_transport_ident(param->key); + if (ret < 0) + goto out_bad_transport; + ctx->nfs_server.protocol = ret; break; case Opt_acl: if (result.negated) @@ -670,11 +669,13 @@ static int nfs_fs_context_parse_param(struct fs_context *fc, case Opt_xprt_rdma: /* vector side protocols to TCP */ ctx->flags |= NFS_MOUNT_TCP; - ctx->nfs_server.protocol = XPRT_TRANSPORT_RDMA; - xprt_load_transport(param->string); + ret = xprt_find_transport_ident(param->string); + if (ret < 0) + goto out_bad_transport; + ctx->nfs_server.protocol = ret; break; default: - return nfs_invalf(fc, "NFS: Unrecognized transport protocol"); + goto out_bad_transport; } ctx->protofamily = protofamily; @@ -697,7 +698,7 @@ static int nfs_fs_context_parse_param(struct fs_context *fc, break; case Opt_xprt_rdma: /* not used for side protocols */ default: - return nfs_invalf(fc, "NFS: Unrecognized transport protocol"); + goto out_bad_transport; } ctx->mountfamily = mountfamily; break; @@ -787,6 +788,8 @@ out_invalid_address: return nfs_invalf(fc, "NFS: Bad IP address specified"); out_of_bounds: return nfs_invalf(fc, "NFS: Value for '%s' out of range", param->key); +out_bad_transport: + return nfs_invalf(fc, "NFS: Unrecognized transport protocol"); } /* diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index aa6493905bbe..447e95974386 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -229,7 +229,6 @@ static void nfs_zap_caches_locked(struct inode *inode) nfsi->attrtimeo = NFS_MINATTRTIMEO(inode); nfsi->attrtimeo_timestamp = jiffies; - memset(NFS_I(inode)->cookieverf, 0, sizeof(NFS_I(inode)->cookieverf)); if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)) { nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR | NFS_INO_INVALID_DATA @@ -595,7 +594,8 @@ EXPORT_SYMBOL_GPL(nfs_fhget); #define NFS_VALID_ATTRS (ATTR_MODE|ATTR_UID|ATTR_GID|ATTR_SIZE|ATTR_ATIME|ATTR_ATIME_SET|ATTR_MTIME|ATTR_MTIME_SET|ATTR_FILE|ATTR_OPEN) int -nfs_setattr(struct dentry *dentry, struct iattr *attr) +nfs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, + struct iattr *attr) { struct inode *inode = d_inode(dentry); struct nfs_fattr *fattr; @@ -788,8 +788,8 @@ static bool nfs_need_revalidate_inode(struct inode *inode) return false; } -int nfs_getattr(const struct path *path, struct kstat *stat, - u32 request_mask, unsigned int query_flags) +int nfs_getattr(struct user_namespace *mnt_userns, const struct path *path, + struct kstat *stat, u32 request_mask, unsigned int query_flags) { struct inode *inode = d_inode(path->dentry); struct nfs_server *server = NFS_SERVER(inode); @@ -858,7 +858,7 @@ out_no_revalidate: /* Only return attributes that were revalidated. */ stat->result_mask &= request_mask; out_no_update: - generic_fillattr(inode, stat); + generic_fillattr(&init_user_ns, inode, stat); stat->ino = nfs_compat_user_ino64(NFS_FILEID(inode)); if (S_ISDIR(inode->i_mode)) stat->blksize = NFS_SERVER(inode)->dtsize; @@ -1237,7 +1237,6 @@ EXPORT_SYMBOL_GPL(nfs_revalidate_inode); static int nfs_invalidate_mapping(struct inode *inode, struct address_space *mapping) { - struct nfs_inode *nfsi = NFS_I(inode); int ret; if (mapping->nrpages != 0) { @@ -1250,11 +1249,6 @@ static int nfs_invalidate_mapping(struct inode *inode, struct address_space *map if (ret < 0) return ret; } - if (S_ISDIR(inode->i_mode)) { - spin_lock(&inode->i_lock); - memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf)); - spin_unlock(&inode->i_lock); - } nfs_inc_stats(inode, NFSIOS_DATAINVALIDATE); nfs_fscache_wait_on_invalidate(inode); @@ -2180,7 +2174,7 @@ static int nfsiod_start(void) { struct workqueue_struct *wq; dprintk("RPC: creating workqueue nfsiod\n"); - wq = alloc_workqueue("nfsiod", WQ_MEM_RECLAIM, 0); + wq = alloc_workqueue("nfsiod", WQ_MEM_RECLAIM | WQ_UNBOUND, 0); if (wq == NULL) return -ENOMEM; nfsiod_workqueue = wq; diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 6673a77884d9..25fb43b69e5a 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -56,12 +56,6 @@ static inline bool nfs_lookup_is_soft_revalidate(const struct dentry *dentry) #define NFS_UNSPEC_RETRANS (UINT_MAX) #define NFS_UNSPEC_TIMEO (UINT_MAX) -/* - * Maximum number of pages that readdir can use for creating - * a vmapped array of pages. - */ -#define NFS_MAX_READDIR_PAGES 8 - struct nfs_client_initdata { unsigned long init_flags; const char *hostname; /* Hostname of the server */ @@ -142,9 +136,29 @@ struct nfs_fs_context { } clone_data; }; -#define nfs_errorf(fc, fmt, ...) errorf(fc, fmt, ## __VA_ARGS__) -#define nfs_invalf(fc, fmt, ...) invalf(fc, fmt, ## __VA_ARGS__) -#define nfs_warnf(fc, fmt, ...) warnf(fc, fmt, ## __VA_ARGS__) +#define nfs_errorf(fc, fmt, ...) ((fc)->log.log ? \ + errorf(fc, fmt, ## __VA_ARGS__) : \ + ({ dprintk(fmt "\n", ## __VA_ARGS__); })) + +#define nfs_ferrorf(fc, fac, fmt, ...) ((fc)->log.log ? \ + errorf(fc, fmt, ## __VA_ARGS__) : \ + ({ dfprintk(fac, fmt "\n", ## __VA_ARGS__); })) + +#define nfs_invalf(fc, fmt, ...) ((fc)->log.log ? \ + invalf(fc, fmt, ## __VA_ARGS__) : \ + ({ dprintk(fmt "\n", ## __VA_ARGS__); -EINVAL; })) + +#define nfs_finvalf(fc, fac, fmt, ...) ((fc)->log.log ? \ + invalf(fc, fmt, ## __VA_ARGS__) : \ + ({ dfprintk(fac, fmt "\n", ## __VA_ARGS__); -EINVAL; })) + +#define nfs_warnf(fc, fmt, ...) ((fc)->log.log ? \ + warnf(fc, fmt, ## __VA_ARGS__) : \ + ({ dprintk(fmt "\n", ## __VA_ARGS__); })) + +#define nfs_fwarnf(fc, fac, fmt, ...) ((fc)->log.log ? \ + warnf(fc, fmt, ## __VA_ARGS__) : \ + ({ dfprintk(fac, fmt "\n", ## __VA_ARGS__); })) static inline struct nfs_fs_context *nfs_fc2context(const struct fs_context *fc) { @@ -364,14 +378,18 @@ extern unsigned long nfs_access_cache_count(struct shrinker *shrink, extern unsigned long nfs_access_cache_scan(struct shrinker *shrink, struct shrink_control *sc); struct dentry *nfs_lookup(struct inode *, struct dentry *, unsigned int); -int nfs_create(struct inode *, struct dentry *, umode_t, bool); -int nfs_mkdir(struct inode *, struct dentry *, umode_t); +int nfs_create(struct user_namespace *, struct inode *, struct dentry *, + umode_t, bool); +int nfs_mkdir(struct user_namespace *, struct inode *, struct dentry *, + umode_t); int nfs_rmdir(struct inode *, struct dentry *); int nfs_unlink(struct inode *, struct dentry *); -int nfs_symlink(struct inode *, struct dentry *, const char *); +int nfs_symlink(struct user_namespace *, struct inode *, struct dentry *, + const char *); int nfs_link(struct dentry *, struct inode *, struct dentry *); -int nfs_mknod(struct inode *, struct dentry *, umode_t, dev_t); -int nfs_rename(struct inode *, struct dentry *, +int nfs_mknod(struct user_namespace *, struct inode *, struct dentry *, umode_t, + dev_t); +int nfs_rename(struct user_namespace *, struct inode *, struct dentry *, struct inode *, struct dentry *, unsigned int); /* file.c */ @@ -585,12 +603,14 @@ extern void nfs4_test_session_trunk(struct rpc_clnt *clnt, static inline struct inode *nfs_igrab_and_active(struct inode *inode) { - inode = igrab(inode); - if (inode != NULL && !nfs_sb_active(inode->i_sb)) { - iput(inode); - inode = NULL; + struct super_block *sb = inode->i_sb; + + if (sb && nfs_sb_active(sb)) { + if (igrab(inode)) + return inode; + nfs_sb_deactive(sb); } - return inode; + return NULL; } static inline void nfs_iput_and_deactive(struct inode *inode) diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index 2bcbe38afe2e..93e60e921f92 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c @@ -208,20 +208,23 @@ out_fc: } static int -nfs_namespace_getattr(const struct path *path, struct kstat *stat, - u32 request_mask, unsigned int query_flags) +nfs_namespace_getattr(struct user_namespace *mnt_userns, + const struct path *path, struct kstat *stat, + u32 request_mask, unsigned int query_flags) { if (NFS_FH(d_inode(path->dentry))->size != 0) - return nfs_getattr(path, stat, request_mask, query_flags); - generic_fillattr(d_inode(path->dentry), stat); + return nfs_getattr(mnt_userns, path, stat, request_mask, + query_flags); + generic_fillattr(&init_user_ns, d_inode(path->dentry), stat); return 0; } static int -nfs_namespace_setattr(struct dentry *dentry, struct iattr *attr) +nfs_namespace_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, + struct iattr *attr) { if (NFS_FH(d_inode(dentry))->size != 0) - return nfs_setattr(dentry, attr); + return nfs_setattr(mnt_userns, dentry, attr); return -EACCES; } diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c index f6676af37d5d..7fba7711e6b3 100644 --- a/fs/nfs/nfs2xdr.c +++ b/fs/nfs/nfs2xdr.c @@ -34,6 +34,7 @@ * Declare the space requirements for NFS arguments and replies as * number of 32bit-words */ +#define NFS_pagepad_sz (1) /* Page padding */ #define NFS_fhandle_sz (8) #define NFS_sattr_sz (8) #define NFS_filename_sz (1+(NFS2_MAXNAMLEN>>2)) @@ -56,11 +57,11 @@ #define NFS_attrstat_sz (1+NFS_fattr_sz) #define NFS_diropres_sz (1+NFS_fhandle_sz+NFS_fattr_sz) -#define NFS_readlinkres_sz (2+1) -#define NFS_readres_sz (1+NFS_fattr_sz+1+1) +#define NFS_readlinkres_sz (2+NFS_pagepad_sz) +#define NFS_readres_sz (1+NFS_fattr_sz+1+NFS_pagepad_sz) #define NFS_writeres_sz (NFS_attrstat_sz) #define NFS_stat_sz (1) -#define NFS_readdirres_sz (1+1) +#define NFS_readdirres_sz (1+NFS_pagepad_sz) #define NFS_statfsres_sz (1+NFS_info_sz) static int nfs_stat_to_errno(enum nfs_stat); @@ -592,8 +593,8 @@ static void nfs2_xdr_enc_readlinkargs(struct rpc_rqst *req, const struct nfs_readlinkargs *args = data; encode_fhandle(xdr, args->fh); - rpc_prepare_reply_pages(req, args->pages, args->pgbase, - args->pglen, NFS_readlinkres_sz); + rpc_prepare_reply_pages(req, args->pages, args->pgbase, args->pglen, + NFS_readlinkres_sz - NFS_pagepad_sz); } /* @@ -628,8 +629,8 @@ static void nfs2_xdr_enc_readargs(struct rpc_rqst *req, const struct nfs_pgio_args *args = data; encode_readargs(xdr, args); - rpc_prepare_reply_pages(req, args->pages, args->pgbase, - args->count, NFS_readres_sz); + rpc_prepare_reply_pages(req, args->pages, args->pgbase, args->count, + NFS_readres_sz - NFS_pagepad_sz); req->rq_rcv_buf.flags |= XDRBUF_READ; } @@ -786,8 +787,8 @@ static void nfs2_xdr_enc_readdirargs(struct rpc_rqst *req, const struct nfs_readdirargs *args = data; encode_readdirargs(xdr, args); - rpc_prepare_reply_pages(req, args->pages, 0, - args->count, NFS_readdirres_sz); + rpc_prepare_reply_pages(req, args->pages, 0, args->count, + NFS_readdirres_sz - NFS_pagepad_sz); } /* diff --git a/fs/nfs/nfs3_fs.h b/fs/nfs/nfs3_fs.h index 1b950b66b3bb..c8a192802dda 100644 --- a/fs/nfs/nfs3_fs.h +++ b/fs/nfs/nfs3_fs.h @@ -12,7 +12,8 @@ */ #ifdef CONFIG_NFS_V3_ACL extern struct posix_acl *nfs3_get_acl(struct inode *inode, int type); -extern int nfs3_set_acl(struct inode *inode, struct posix_acl *acl, int type); +extern int nfs3_set_acl(struct user_namespace *mnt_userns, struct inode *inode, + struct posix_acl *acl, int type); extern int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl, struct posix_acl *dfacl); extern ssize_t nfs3_listxattr(struct dentry *, char *, size_t); diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c index c6c863382f37..5604e807fc01 100644 --- a/fs/nfs/nfs3acl.c +++ b/fs/nfs/nfs3acl.c @@ -251,7 +251,8 @@ int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl, } -int nfs3_set_acl(struct inode *inode, struct posix_acl *acl, int type) +int nfs3_set_acl(struct user_namespace *mnt_userns, struct inode *inode, + struct posix_acl *acl, int type) { struct posix_acl *orig = acl, *dfacl = NULL, *alloc; int status; diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 2397ceedba8a..5c4e23abc345 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -154,14 +154,14 @@ nfs3_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, } static int -nfs3_proc_lookup(struct inode *dir, struct dentry *dentry, - struct nfs_fh *fhandle, struct nfs_fattr *fattr, - struct nfs4_label *label) +__nfs3_proc_lookup(struct inode *dir, const char *name, size_t len, + struct nfs_fh *fhandle, struct nfs_fattr *fattr, + unsigned short task_flags) { struct nfs3_diropargs arg = { .fh = NFS_FH(dir), - .name = dentry->d_name.name, - .len = dentry->d_name.len + .name = name, + .len = len }; struct nfs3_diropres res = { .fh = fhandle, @@ -173,17 +173,11 @@ nfs3_proc_lookup(struct inode *dir, struct dentry *dentry, .rpc_resp = &res, }; int status; - unsigned short task_flags = 0; - - /* Is this is an attribute revalidation, subject to softreval? */ - if (nfs_lookup_is_soft_revalidate(dentry)) - task_flags |= RPC_TASK_TIMEOUT; res.dir_attr = nfs_alloc_fattr(); if (res.dir_attr == NULL) return -ENOMEM; - dprintk("NFS call lookup %pd2\n", dentry); nfs_fattr_init(fattr); status = rpc_call_sync(NFS_CLIENT(dir), &msg, task_flags); nfs_refresh_inode(dir, res.dir_attr); @@ -198,6 +192,37 @@ nfs3_proc_lookup(struct inode *dir, struct dentry *dentry, return status; } +static int +nfs3_proc_lookup(struct inode *dir, struct dentry *dentry, + struct nfs_fh *fhandle, struct nfs_fattr *fattr, + struct nfs4_label *label) +{ + unsigned short task_flags = 0; + + /* Is this is an attribute revalidation, subject to softreval? */ + if (nfs_lookup_is_soft_revalidate(dentry)) + task_flags |= RPC_TASK_TIMEOUT; + + dprintk("NFS call lookup %pd2\n", dentry); + return __nfs3_proc_lookup(dir, dentry->d_name.name, + dentry->d_name.len, fhandle, fattr, + task_flags); +} + +static int nfs3_proc_lookupp(struct inode *inode, struct nfs_fh *fhandle, + struct nfs_fattr *fattr, struct nfs4_label *label) +{ + const char dotdot[] = ".."; + const size_t len = strlen(dotdot); + unsigned short task_flags = 0; + + if (NFS_SERVER(inode)->flags & NFS_MOUNT_SOFTREVAL) + task_flags |= RPC_TASK_TIMEOUT; + + return __nfs3_proc_lookup(inode, dotdot, len, fhandle, fattr, + task_flags); +} + static int nfs3_proc_access(struct inode *inode, struct nfs_access_entry *entry) { struct nfs3_accessargs arg = { @@ -637,37 +662,36 @@ out: * Also note that this implementation handles both plain readdir and * readdirplus. */ -static int -nfs3_proc_readdir(struct dentry *dentry, const struct cred *cred, - u64 cookie, struct page **pages, unsigned int count, bool plus) +static int nfs3_proc_readdir(struct nfs_readdir_arg *nr_arg, + struct nfs_readdir_res *nr_res) { - struct inode *dir = d_inode(dentry); - __be32 *verf = NFS_I(dir)->cookieverf; + struct inode *dir = d_inode(nr_arg->dentry); struct nfs3_readdirargs arg = { .fh = NFS_FH(dir), - .cookie = cookie, - .verf = {verf[0], verf[1]}, - .plus = plus, - .count = count, - .pages = pages + .cookie = nr_arg->cookie, + .plus = nr_arg->plus, + .count = nr_arg->page_len, + .pages = nr_arg->pages }; struct nfs3_readdirres res = { - .verf = verf, - .plus = plus + .verf = nr_res->verf, + .plus = nr_arg->plus, }; struct rpc_message msg = { .rpc_proc = &nfs3_procedures[NFS3PROC_READDIR], .rpc_argp = &arg, .rpc_resp = &res, - .rpc_cred = cred, + .rpc_cred = nr_arg->cred, }; int status = -ENOMEM; - if (plus) + if (nr_arg->plus) msg.rpc_proc = &nfs3_procedures[NFS3PROC_READDIRPLUS]; + if (arg.cookie) + memcpy(arg.verf, nr_arg->verf, sizeof(arg.verf)); - dprintk("NFS call readdir%s %d\n", - plus? "plus" : "", (unsigned int) cookie); + dprintk("NFS call readdir%s %llu\n", nr_arg->plus ? "plus" : "", + (unsigned long long)nr_arg->cookie); res.dir_attr = nfs_alloc_fattr(); if (res.dir_attr == NULL) @@ -680,8 +704,8 @@ nfs3_proc_readdir(struct dentry *dentry, const struct cred *cred, nfs_free_fattr(res.dir_attr); out: - dprintk("NFS reply readdir%s: %d\n", - plus? "plus" : "", status); + dprintk("NFS reply readdir%s: %d\n", nr_arg->plus ? "plus" : "", + status); return status; } @@ -1004,6 +1028,7 @@ const struct nfs_rpc_ops nfs_v3_clientops = { .getattr = nfs3_proc_getattr, .setattr = nfs3_proc_setattr, .lookup = nfs3_proc_lookup, + .lookupp = nfs3_proc_lookupp, .access = nfs3_proc_access, .readlink = nfs3_proc_readlink, .create = nfs3_proc_create, diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index 69971f6c840d..ca10072644ff 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c @@ -33,6 +33,7 @@ * Declare the space requirements for NFS arguments and replies as * number of 32bit-words */ +#define NFS3_pagepad_sz (1) /* Page padding */ #define NFS3_fhandle_sz (1+16) #define NFS3_fh_sz (NFS3_fhandle_sz) /* shorthand */ #define NFS3_sattr_sz (15) @@ -69,13 +70,13 @@ #define NFS3_removeres_sz (NFS3_setattrres_sz) #define NFS3_lookupres_sz (1+NFS3_fh_sz+(2 * NFS3_post_op_attr_sz)) #define NFS3_accessres_sz (1+NFS3_post_op_attr_sz+1) -#define NFS3_readlinkres_sz (1+NFS3_post_op_attr_sz+1+1) -#define NFS3_readres_sz (1+NFS3_post_op_attr_sz+3+1) +#define NFS3_readlinkres_sz (1+NFS3_post_op_attr_sz+1+NFS3_pagepad_sz) +#define NFS3_readres_sz (1+NFS3_post_op_attr_sz+3+NFS3_pagepad_sz) #define NFS3_writeres_sz (1+NFS3_wcc_data_sz+4) #define NFS3_createres_sz (1+NFS3_fh_sz+NFS3_post_op_attr_sz+NFS3_wcc_data_sz) #define NFS3_renameres_sz (1+(2 * NFS3_wcc_data_sz)) #define NFS3_linkres_sz (1+NFS3_post_op_attr_sz+NFS3_wcc_data_sz) -#define NFS3_readdirres_sz (1+NFS3_post_op_attr_sz+2+1) +#define NFS3_readdirres_sz (1+NFS3_post_op_attr_sz+2+NFS3_pagepad_sz) #define NFS3_fsstatres_sz (1+NFS3_post_op_attr_sz+13) #define NFS3_fsinfores_sz (1+NFS3_post_op_attr_sz+12) #define NFS3_pathconfres_sz (1+NFS3_post_op_attr_sz+6) @@ -85,7 +86,8 @@ #define ACL3_setaclargs_sz (NFS3_fh_sz+1+ \ XDR_QUADLEN(NFS_ACL_INLINE_BUFSIZE)) #define ACL3_getaclres_sz (1+NFS3_post_op_attr_sz+1+ \ - XDR_QUADLEN(NFS_ACL_INLINE_BUFSIZE)+1) + XDR_QUADLEN(NFS_ACL_INLINE_BUFSIZE)+\ + NFS3_pagepad_sz) #define ACL3_setaclres_sz (1+NFS3_post_op_attr_sz) static int nfs3_stat_to_errno(enum nfs_stat); @@ -909,8 +911,8 @@ static void nfs3_xdr_enc_readlink3args(struct rpc_rqst *req, const struct nfs3_readlinkargs *args = data; encode_nfs_fh3(xdr, args->fh); - rpc_prepare_reply_pages(req, args->pages, args->pgbase, - args->pglen, NFS3_readlinkres_sz); + rpc_prepare_reply_pages(req, args->pages, args->pgbase, args->pglen, + NFS3_readlinkres_sz - NFS3_pagepad_sz); } /* @@ -939,7 +941,8 @@ static void nfs3_xdr_enc_read3args(struct rpc_rqst *req, const void *data) { const struct nfs_pgio_args *args = data; - unsigned int replen = args->replen ? args->replen : NFS3_readres_sz; + unsigned int replen = args->replen ? args->replen : + NFS3_readres_sz - NFS3_pagepad_sz; encode_read3args(xdr, args); rpc_prepare_reply_pages(req, args->pages, args->pgbase, @@ -1239,8 +1242,8 @@ static void nfs3_xdr_enc_readdir3args(struct rpc_rqst *req, const struct nfs3_readdirargs *args = data; encode_readdir3args(xdr, args); - rpc_prepare_reply_pages(req, args->pages, 0, - args->count, NFS3_readdirres_sz); + rpc_prepare_reply_pages(req, args->pages, 0, args->count, + NFS3_readdirres_sz - NFS3_pagepad_sz); } /* @@ -1281,8 +1284,8 @@ static void nfs3_xdr_enc_readdirplus3args(struct rpc_rqst *req, const struct nfs3_readdirargs *args = data; encode_readdirplus3args(xdr, args); - rpc_prepare_reply_pages(req, args->pages, 0, - args->count, NFS3_readdirres_sz); + rpc_prepare_reply_pages(req, args->pages, 0, args->count, + NFS3_readdirres_sz - NFS3_pagepad_sz); } /* @@ -1328,7 +1331,7 @@ static void nfs3_xdr_enc_getacl3args(struct rpc_rqst *req, if (args->mask & (NFS_ACL | NFS_DFACL)) { rpc_prepare_reply_pages(req, args->pages, 0, NFSACL_MAXPAGES << PAGE_SHIFT, - ACL3_getaclres_sz); + ACL3_getaclres_sz - NFS3_pagepad_sz); req->rq_rcv_buf.flags |= XDRBUF_SPARSE_PAGES; } } @@ -1648,7 +1651,7 @@ static int nfs3_xdr_dec_read3res(struct rpc_rqst *req, struct xdr_stream *xdr, result->op_status = status; if (status != NFS3_OK) goto out_status; - result->replen = 4 + ((xdr_stream_pos(xdr) - pos) >> 2); + result->replen = 3 + ((xdr_stream_pos(xdr) - pos) >> 2); error = decode_read3resok(xdr, result); out: return error; diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c index 2b2211d1234e..f3fd935620fc 100644 --- a/fs/nfs/nfs42proc.c +++ b/fs/nfs/nfs42proc.c @@ -1173,14 +1173,12 @@ static int _nfs42_proc_setxattr(struct inode *inode, const char *name, } static ssize_t _nfs42_proc_getxattr(struct inode *inode, const char *name, - void *buf, size_t buflen) + void *buf, size_t buflen, struct page **pages, + size_t plen) { struct nfs_server *server = NFS_SERVER(inode); - struct page *pages[NFS4XATTR_MAXPAGES] = {}; struct nfs42_getxattrargs arg = { .fh = NFS_FH(inode), - .xattr_pages = pages, - .xattr_len = buflen, .xattr_name = name, }; struct nfs42_getxattrres res; @@ -1189,7 +1187,10 @@ static ssize_t _nfs42_proc_getxattr(struct inode *inode, const char *name, .rpc_argp = &arg, .rpc_resp = &res, }; - int ret, np; + ssize_t ret; + + arg.xattr_len = plen; + arg.xattr_pages = pages; ret = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 0); @@ -1214,10 +1215,6 @@ static ssize_t _nfs42_proc_getxattr(struct inode *inode, const char *name, _copy_from_pages(buf, pages, 0, res.xattr_len); } - np = DIV_ROUND_UP(res.xattr_len, PAGE_SIZE); - while (--np >= 0) - __free_page(pages[np]); - return res.xattr_len; } @@ -1241,12 +1238,13 @@ static ssize_t _nfs42_proc_listxattrs(struct inode *inode, void *buf, .rpc_resp = &res, }; u32 xdrlen; - int ret, np; + int ret, np, i; + ret = -ENOMEM; res.scratch = alloc_page(GFP_KERNEL); if (!res.scratch) - return -ENOMEM; + goto out; xdrlen = nfs42_listxattr_xdrsize(buflen); if (xdrlen > server->lxasize) @@ -1254,9 +1252,12 @@ static ssize_t _nfs42_proc_listxattrs(struct inode *inode, void *buf, np = xdrlen / PAGE_SIZE + 1; pages = kcalloc(np, sizeof(struct page *), GFP_KERNEL); - if (pages == NULL) { - __free_page(res.scratch); - return -ENOMEM; + if (!pages) + goto out_free_scratch; + for (i = 0; i < np; i++) { + pages[i] = alloc_page(GFP_KERNEL); + if (!pages[i]) + goto out_free_pages; } arg.xattr_pages = pages; @@ -1271,14 +1272,15 @@ static ssize_t _nfs42_proc_listxattrs(struct inode *inode, void *buf, *eofp = res.eof; } +out_free_pages: while (--np >= 0) { if (pages[np]) __free_page(pages[np]); } - - __free_page(res.scratch); kfree(pages); - +out_free_scratch: + __free_page(res.scratch); +out: return ret; } @@ -1287,16 +1289,45 @@ ssize_t nfs42_proc_getxattr(struct inode *inode, const char *name, void *buf, size_t buflen) { struct nfs4_exception exception = { }; - ssize_t err; + ssize_t err, np, i; + struct page **pages; + + np = nfs_page_array_len(0, buflen ?: XATTR_SIZE_MAX); + pages = kmalloc_array(np, sizeof(*pages), GFP_KERNEL); + if (!pages) + return -ENOMEM; + + for (i = 0; i < np; i++) { + pages[i] = alloc_page(GFP_KERNEL); + if (!pages[i]) { + np = i + 1; + err = -ENOMEM; + goto out; + } + } + /* + * The GETXATTR op has no length field in the call, and the + * xattr data is at the end of the reply. + * + * There is no downside in using the page-aligned length. It will + * allow receiving and caching xattrs that are too large for the + * caller but still fit in the page-rounded value. + */ do { - err = _nfs42_proc_getxattr(inode, name, buf, buflen); + err = _nfs42_proc_getxattr(inode, name, buf, buflen, + pages, np * PAGE_SIZE); if (err >= 0) break; err = nfs4_handle_exception(NFS_SERVER(inode), err, &exception); } while (exception.retry); +out: + while (--np >= 0) + __free_page(pages[np]); + kfree(pages); + return err; } diff --git a/fs/nfs/nfs42xattr.c b/fs/nfs/nfs42xattr.c index b51424ff8159..6c2ce799150f 100644 --- a/fs/nfs/nfs42xattr.c +++ b/fs/nfs/nfs42xattr.c @@ -1047,8 +1047,10 @@ out4: void nfs4_xattr_cache_exit(void) { + unregister_shrinker(&nfs4_xattr_large_entry_shrinker); unregister_shrinker(&nfs4_xattr_entry_shrinker); unregister_shrinker(&nfs4_xattr_cache_shrinker); + list_lru_destroy(&nfs4_xattr_large_entry_lru); list_lru_destroy(&nfs4_xattr_entry_lru); list_lru_destroy(&nfs4_xattr_cache_lru); kmem_cache_destroy(nfs4_xattr_cache_cachep); diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c index 0dc31ad2362e..c8bad735e4c1 100644 --- a/fs/nfs/nfs42xdr.c +++ b/fs/nfs/nfs42xdr.c @@ -191,12 +191,12 @@ #define encode_getxattr_maxsz (op_encode_hdr_maxsz + 1 + \ nfs4_xattr_name_maxsz) -#define decode_getxattr_maxsz (op_decode_hdr_maxsz + 1 + 1) +#define decode_getxattr_maxsz (op_decode_hdr_maxsz + 1 + pagepad_maxsz) #define encode_setxattr_maxsz (op_encode_hdr_maxsz + \ 1 + nfs4_xattr_name_maxsz + 1) #define decode_setxattr_maxsz (op_decode_hdr_maxsz + decode_change_info_maxsz) #define encode_listxattrs_maxsz (op_encode_hdr_maxsz + 2 + 1) -#define decode_listxattrs_maxsz (op_decode_hdr_maxsz + 2 + 1 + 1) +#define decode_listxattrs_maxsz (op_decode_hdr_maxsz + 2 + 1 + 1 + 1) #define encode_removexattr_maxsz (op_encode_hdr_maxsz + 1 + \ nfs4_xattr_name_maxsz) #define decode_removexattr_maxsz (op_decode_hdr_maxsz + \ @@ -489,6 +489,12 @@ static int decode_getxattr(struct xdr_stream *xdr, return -EIO; len = be32_to_cpup(p); + + /* + * Only check against the page length here. The actual + * requested length may be smaller, but that is only + * checked against after possibly caching a valid reply. + */ if (len > req->rq_rcv_buf.page_len) return -ERANGE; @@ -531,7 +537,7 @@ static void encode_listxattrs(struct xdr_stream *xdr, { __be32 *p; - encode_op_hdr(xdr, OP_LISTXATTRS, decode_listxattrs_maxsz + 1, hdr); + encode_op_hdr(xdr, OP_LISTXATTRS, decode_listxattrs_maxsz, hdr); p = reserve_space(xdr, 12); if (unlikely(!p)) @@ -1019,56 +1025,80 @@ static int decode_deallocate(struct xdr_stream *xdr, struct nfs42_falloc_res *re return decode_op_hdr(xdr, OP_DEALLOCATE); } -static int decode_read_plus_data(struct xdr_stream *xdr, struct nfs_pgio_res *res, - uint32_t *eof) +static int decode_read_plus_data(struct xdr_stream *xdr, + struct nfs_pgio_args *args, + struct nfs_pgio_res *res) { uint32_t count, recvd; uint64_t offset; __be32 *p; p = xdr_inline_decode(xdr, 8 + 4); - if (unlikely(!p)) - return -EIO; + if (!p) + return 1; p = xdr_decode_hyper(p, &offset); count = be32_to_cpup(p); - recvd = xdr_align_data(xdr, res->count, count); - res->count += recvd; - - if (count > recvd) { - dprintk("NFS: server cheating in read reply: " - "count %u > recvd %u\n", count, recvd); - *eof = 0; + recvd = xdr_align_data(xdr, res->count, xdr_align_size(count)); + if (recvd > count) + recvd = count; + if (res->count + recvd > args->count) { + if (args->count > res->count) + res->count += args->count - res->count; return 1; } - + res->count += recvd; + if (count > recvd) + return 1; return 0; } -static int decode_read_plus_hole(struct xdr_stream *xdr, struct nfs_pgio_res *res, - uint32_t *eof) +static int decode_read_plus_hole(struct xdr_stream *xdr, + struct nfs_pgio_args *args, + struct nfs_pgio_res *res, uint32_t *eof) { uint64_t offset, length, recvd; __be32 *p; p = xdr_inline_decode(xdr, 8 + 8); - if (unlikely(!p)) - return -EIO; + if (!p) + return 1; p = xdr_decode_hyper(p, &offset); p = xdr_decode_hyper(p, &length); + if (offset != args->offset + res->count) { + /* Server returned an out-of-sequence extent */ + if (offset > args->offset + res->count || + offset + length < args->offset + res->count) { + dprintk("NFS: server returned out of sequence extent: " + "offset/size = %llu/%llu != expected %llu\n", + (unsigned long long)offset, + (unsigned long long)length, + (unsigned long long)(args->offset + + res->count)); + return 1; + } + length -= args->offset + res->count - offset; + } + if (length + res->count > args->count) { + *eof = 0; + if (unlikely(res->count >= args->count)) + return 1; + length = args->count - res->count; + } recvd = xdr_expand_hole(xdr, res->count, length); res->count += recvd; - if (recvd < length) { - *eof = 0; + if (recvd < length) return 1; - } return 0; } static int decode_read_plus(struct xdr_stream *xdr, struct nfs_pgio_res *res) { + struct nfs_pgio_header *hdr = + container_of(res, struct nfs_pgio_header, res); + struct nfs_pgio_args *args = &hdr->args; uint32_t eof, segments, type; int status, i; __be32 *p; @@ -1081,6 +1111,7 @@ static int decode_read_plus(struct xdr_stream *xdr, struct nfs_pgio_res *res) if (unlikely(!p)) return -EIO; + res->count = 0; eof = be32_to_cpup(p++); segments = be32_to_cpup(p++); if (segments == 0) @@ -1088,26 +1119,31 @@ static int decode_read_plus(struct xdr_stream *xdr, struct nfs_pgio_res *res) for (i = 0; i < segments; i++) { p = xdr_inline_decode(xdr, 4); - if (unlikely(!p)) - return -EIO; + if (!p) + goto early_out; type = be32_to_cpup(p++); if (type == NFS4_CONTENT_DATA) - status = decode_read_plus_data(xdr, res, &eof); + status = decode_read_plus_data(xdr, args, res); else if (type == NFS4_CONTENT_HOLE) - status = decode_read_plus_hole(xdr, res, &eof); + status = decode_read_plus_hole(xdr, args, res, &eof); else return -EINVAL; if (status < 0) return status; if (status > 0) - break; + goto early_out; } out: res->eof = eof; return 0; +early_out: + if (unlikely(!i)) + return -EIO; + res->eof = 0; + return 0; } static int decode_seek(struct xdr_stream *xdr, struct nfs42_seek_res *res) @@ -1476,18 +1512,16 @@ static void nfs4_xdr_enc_getxattr(struct rpc_rqst *req, struct xdr_stream *xdr, struct compound_hdr hdr = { .minorversion = nfs4_xdr_minorversion(&args->seq_args), }; - size_t plen; + uint32_t replen; encode_compound_hdr(xdr, req, &hdr); encode_sequence(xdr, &args->seq_args, &hdr); encode_putfh(xdr, args->fh, &hdr); + replen = hdr.replen + op_decode_hdr_maxsz + 1; encode_getxattr(xdr, args->xattr_name, &hdr); - plen = args->xattr_len ? args->xattr_len : XATTR_SIZE_MAX; - - rpc_prepare_reply_pages(req, args->xattr_pages, 0, plen, - hdr.replen); - req->rq_rcv_buf.flags |= XDRBUF_SPARSE_PAGES; + rpc_prepare_reply_pages(req, args->xattr_pages, 0, args->xattr_len, + replen); encode_nops(&hdr); } @@ -1520,15 +1554,15 @@ static void nfs4_xdr_enc_listxattrs(struct rpc_rqst *req, struct compound_hdr hdr = { .minorversion = nfs4_xdr_minorversion(&args->seq_args), }; + uint32_t replen; encode_compound_hdr(xdr, req, &hdr); encode_sequence(xdr, &args->seq_args, &hdr); encode_putfh(xdr, args->fh, &hdr); + replen = hdr.replen + op_decode_hdr_maxsz + 2 + 1; encode_listxattrs(xdr, args, &hdr); - rpc_prepare_reply_pages(req, args->xattr_pages, 0, args->count, - hdr.replen); - req->rq_rcv_buf.flags |= XDRBUF_SPARSE_PAGES; + rpc_prepare_reply_pages(req, args->xattr_pages, 0, args->count, replen); encode_nops(&hdr); } @@ -1540,7 +1574,7 @@ static int nfs4_xdr_dec_listxattrs(struct rpc_rqst *rqstp, struct compound_hdr hdr; int status; - xdr_set_scratch_buffer(xdr, page_address(res->scratch), PAGE_SIZE); + xdr_set_scratch_page(xdr, res->scratch); status = decode_compound_hdr(xdr, &hdr); if (status) diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index be7915c861ce..86acffe7335c 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -1153,7 +1153,7 @@ struct nfs_server *nfs4_create_server(struct fs_context *fc) if (!server) return ERR_PTR(-ENOMEM); - server->cred = get_cred(current_cred()); + server->cred = get_cred(fc->cred); auth_probe = ctx->auth_info.flavor_len < 1; diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index 9d354de613da..441a2fa073c8 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c @@ -377,10 +377,10 @@ static struct file *__nfs42_ssc_open(struct vfsmount *ss_mnt, goto out_stateowner; set_bit(NFS_SRV_SSC_COPY_STATE, &ctx->state->flags); - set_bit(NFS_OPEN_STATE, &ctx->state->flags); memcpy(&ctx->state->open_stateid.other, &stateid->other, NFS4_STATEID_OTHER_SIZE); update_open_stateid(ctx->state, stateid, NULL, filep->f_mode); + set_bit(NFS_OPEN_STATE, &ctx->state->flags); nfs_file_set_open_context(filep, ctx); put_nfs_open_context(ctx); @@ -420,7 +420,9 @@ static const struct nfs4_ssc_client_ops nfs4_ssc_clnt_ops_tbl = { */ void nfs42_ssc_register_ops(void) { +#ifdef CONFIG_NFSD_V4 nfs42_ssc_register(&nfs4_ssc_clnt_ops_tbl); +#endif } /** @@ -431,7 +433,9 @@ void nfs42_ssc_register_ops(void) */ void nfs42_ssc_unregister_ops(void) { +#ifdef CONFIG_NFSD_V4 nfs42_ssc_unregister(&nfs4_ssc_clnt_ops_tbl); +#endif } #endif /* CONFIG_NFS_V4_2 */ diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 9e0ca9b2b210..a07530cf673d 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -184,6 +184,8 @@ static int nfs4_map_errors(int err) return -EPROTONOSUPPORT; case -NFS4ERR_FILE_OPEN: return -EBUSY; + case -NFS4ERR_NOT_SAME: + return -ENOTSYNC; default: dprintk("%s could not handle NFSv4 error %d\n", __func__, -err); @@ -3534,10 +3536,8 @@ static void nfs4_close_done(struct rpc_task *task, void *data) trace_nfs4_close(state, &calldata->arg, &calldata->res, task->tk_status); /* Handle Layoutreturn errors */ - if (pnfs_roc_done(task, calldata->inode, - &calldata->arg.lr_args, - &calldata->res.lr_res, - &calldata->res.lr_ret) == -EAGAIN) + if (pnfs_roc_done(task, &calldata->arg.lr_args, &calldata->res.lr_res, + &calldata->res.lr_ret) == -EAGAIN) goto out_restart; /* hmm. we are done with the inode, and in the process of freeing @@ -4397,6 +4397,10 @@ static int _nfs4_proc_lookupp(struct inode *inode, .rpc_argp = &args, .rpc_resp = &res, }; + unsigned short task_flags = 0; + + if (NFS_SERVER(inode)->flags & NFS_MOUNT_SOFTREVAL) + task_flags |= RPC_TASK_TIMEOUT; args.bitmask = nfs4_bitmask(server, label); @@ -4404,7 +4408,7 @@ static int _nfs4_proc_lookupp(struct inode *inode, dprintk("NFS call lookupp ino=0x%lx\n", inode->i_ino); status = nfs4_call_sync(clnt, server, &msg, &args.seq_args, - &res.seq_res, 0); + &res.seq_res, task_flags); dprintk("NFS reply lookupp: %d\n", status); return status; } @@ -4957,35 +4961,40 @@ static int nfs4_proc_mkdir(struct inode *dir, struct dentry *dentry, return err; } -static int _nfs4_proc_readdir(struct dentry *dentry, const struct cred *cred, - u64 cookie, struct page **pages, unsigned int count, bool plus) +static int _nfs4_proc_readdir(struct nfs_readdir_arg *nr_arg, + struct nfs_readdir_res *nr_res) { - struct inode *dir = d_inode(dentry); + struct inode *dir = d_inode(nr_arg->dentry); + struct nfs_server *server = NFS_SERVER(dir); struct nfs4_readdir_arg args = { .fh = NFS_FH(dir), - .pages = pages, + .pages = nr_arg->pages, .pgbase = 0, - .count = count, - .bitmask = NFS_SERVER(d_inode(dentry))->attr_bitmask, - .plus = plus, + .count = nr_arg->page_len, + .plus = nr_arg->plus, }; struct nfs4_readdir_res res; struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READDIR], .rpc_argp = &args, .rpc_resp = &res, - .rpc_cred = cred, + .rpc_cred = nr_arg->cred, }; int status; - dprintk("%s: dentry = %pd2, cookie = %Lu\n", __func__, - dentry, - (unsigned long long)cookie); - nfs4_setup_readdir(cookie, NFS_I(dir)->cookieverf, dentry, &args); + dprintk("%s: dentry = %pd2, cookie = %llu\n", __func__, + nr_arg->dentry, (unsigned long long)nr_arg->cookie); + if (!(server->caps & NFS_CAP_SECURITY_LABEL)) + args.bitmask = server->attr_bitmask_nl; + else + args.bitmask = server->attr_bitmask; + + nfs4_setup_readdir(nr_arg->cookie, nr_arg->verf, nr_arg->dentry, &args); res.pgbase = args.pgbase; - status = nfs4_call_sync(NFS_SERVER(dir)->client, NFS_SERVER(dir), &msg, &args.seq_args, &res.seq_res, 0); + status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, + &res.seq_res, 0); if (status >= 0) { - memcpy(NFS_I(dir)->cookieverf, res.verifier.data, NFS4_VERIFIER_SIZE); + memcpy(nr_res->verf, res.verifier.data, NFS4_VERIFIER_SIZE); status += args.pgbase; } @@ -4995,19 +5004,18 @@ static int _nfs4_proc_readdir(struct dentry *dentry, const struct cred *cred, return status; } -static int nfs4_proc_readdir(struct dentry *dentry, const struct cred *cred, - u64 cookie, struct page **pages, unsigned int count, bool plus) +static int nfs4_proc_readdir(struct nfs_readdir_arg *arg, + struct nfs_readdir_res *res) { struct nfs4_exception exception = { .interruptible = true, }; int err; do { - err = _nfs4_proc_readdir(dentry, cred, cookie, - pages, count, plus); - trace_nfs4_readdir(d_inode(dentry), err); - err = nfs4_handle_exception(NFS_SERVER(d_inode(dentry)), err, - &exception); + err = _nfs4_proc_readdir(arg, res); + trace_nfs4_readdir(d_inode(arg->dentry), err); + err = nfs4_handle_exception(NFS_SERVER(d_inode(arg->dentry)), + err, &exception); } while (exception.retry); return err; } @@ -5309,18 +5317,18 @@ static int nfs4_read_done(struct rpc_task *task, struct nfs_pgio_header *hdr) nfs4_read_done_cb(task, hdr); } -#ifdef CONFIG_NFS_V4_2 -static void nfs42_read_plus_support(struct nfs_server *server, struct rpc_message *msg) +#if defined CONFIG_NFS_V4_2 && defined CONFIG_NFS_V4_2_READ_PLUS +static void nfs42_read_plus_support(struct nfs_pgio_header *hdr, + struct rpc_message *msg) { - if (server->caps & NFS_CAP_READ_PLUS) + /* Note: We don't use READ_PLUS with pNFS yet */ + if (nfs_server_capable(hdr->inode, NFS_CAP_READ_PLUS) && !hdr->ds_clp) msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ_PLUS]; - else - msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ]; } #else -static void nfs42_read_plus_support(struct nfs_server *server, struct rpc_message *msg) +static void nfs42_read_plus_support(struct nfs_pgio_header *hdr, + struct rpc_message *msg) { - msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ]; } #endif /* CONFIG_NFS_V4_2 */ @@ -5330,7 +5338,8 @@ static void nfs4_proc_read_setup(struct nfs_pgio_header *hdr, hdr->timestamp = jiffies; if (!hdr->pgio_done_cb) hdr->pgio_done_cb = nfs4_read_done_cb; - nfs42_read_plus_support(NFS_SERVER(hdr->inode), msg); + msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ]; + nfs42_read_plus_support(hdr, msg); nfs4_init_sequence(&hdr->args.seq_args, &hdr->res.seq_res, 0, 0); } @@ -6373,10 +6382,8 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata) trace_nfs4_delegreturn_exit(&data->args, &data->res, task->tk_status); /* Handle Layoutreturn errors */ - if (pnfs_roc_done(task, data->inode, - &data->args.lr_args, - &data->res.lr_res, - &data->res.lr_ret) == -EAGAIN) + if (pnfs_roc_done(task, &data->args.lr_args, &data->res.lr_res, + &data->res.lr_ret) == -EAGAIN) goto out_restart; switch (task->tk_status) { @@ -6430,10 +6437,10 @@ static void nfs4_delegreturn_release(void *calldata) struct nfs4_delegreturndata *data = calldata; struct inode *inode = data->inode; + if (data->lr.roc) + pnfs_roc_release(&data->lr.arg, &data->lr.res, + data->res.lr_ret); if (inode) { - if (data->lr.roc) - pnfs_roc_release(&data->lr.arg, &data->lr.res, - data->res.lr_ret); nfs_post_op_update_inode_force_wcc(inode, &data->fattr); nfs_iput_and_deactive(inode); } @@ -6509,16 +6516,14 @@ static int _nfs4_proc_delegreturn(struct inode *inode, const struct cred *cred, nfs_fattr_init(data->res.fattr); data->timestamp = jiffies; data->rpc_status = 0; - data->lr.roc = pnfs_roc(inode, &data->lr.arg, &data->lr.res, cred); data->inode = nfs_igrab_and_active(inode); - if (data->inode) { + if (data->inode || issync) { + data->lr.roc = pnfs_roc(inode, &data->lr.arg, &data->lr.res, + cred); if (data->lr.roc) { data->args.lr_args = &data->lr.arg; data->res.lr_res = &data->lr.res; } - } else if (data->lr.roc) { - pnfs_roc_release(&data->lr.arg, &data->lr.res, 0); - data->lr.roc = false; } task_setup_data.callback_data = data; @@ -7100,9 +7105,9 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f data->arg.new_lock_owner, ret); } else data->cancelled = true; + trace_nfs4_set_lock(fl, state, &data->res.stateid, cmd, ret); rpc_put_task(task); dprintk("%s: done, ret = %d!\n", __func__, ret); - trace_nfs4_set_lock(fl, state, &data->res.stateid, cmd, ret); return ret; } @@ -7486,6 +7491,7 @@ nfs4_release_lockowner(struct nfs_server *server, struct nfs4_lock_state *lsp) #define XATTR_NAME_NFSV4_ACL "system.nfs4_acl" static int nfs4_xattr_set_nfs4_acl(const struct xattr_handler *handler, + struct user_namespace *mnt_userns, struct dentry *unused, struct inode *inode, const char *key, const void *buf, size_t buflen, int flags) @@ -7508,6 +7514,7 @@ static bool nfs4_xattr_list_nfs4_acl(struct dentry *dentry) #ifdef CONFIG_NFS_V4_SECURITY_LABEL static int nfs4_xattr_set_nfs4_label(const struct xattr_handler *handler, + struct user_namespace *mnt_userns, struct dentry *unused, struct inode *inode, const char *key, const void *buf, size_t buflen, int flags) @@ -7558,6 +7565,7 @@ nfs4_listxattr_nfs4_label(struct inode *inode, char *list, size_t list_len) #ifdef CONFIG_NFS_V4_2 static int nfs4_xattr_set_nfs4_user(const struct xattr_handler *handler, + struct user_namespace *mnt_userns, struct dentry *unused, struct inode *inode, const char *key, const void *buf, size_t buflen, int flags) @@ -9651,6 +9659,8 @@ _nfs4_proc_getdeviceinfo(struct nfs_server *server, if (res.notification != args.notify_types) pdev->nocache = 1; + trace_nfs4_getdeviceinfo(server, &pdev->dev_id, status); + dprintk("<-- %s status=%d\n", __func__, status); return status; diff --git a/fs/nfs/nfs4session.h b/fs/nfs/nfs4session.h index b996ee23f1ba..3de425f59b3a 100644 --- a/fs/nfs/nfs4session.h +++ b/fs/nfs/nfs4session.h @@ -34,7 +34,7 @@ enum nfs4_slot_tbl_state { NFS4_SLOT_TBL_DRAINING, }; -#define SLOT_TABLE_SZ DIV_ROUND_UP(NFS4_MAX_SLOT_TABLE, 8*sizeof(long)) +#define SLOT_TABLE_SZ DIV_ROUND_UP(NFS4_MAX_SLOT_TABLE, BITS_PER_LONG) struct nfs4_slot_table { struct nfs4_session *session; /* Parent session */ struct nfs4_slot *slots; /* seqid per slot */ diff --git a/fs/nfs/nfs4super.c b/fs/nfs/nfs4super.c index 93f5c1678ec2..d09bcfd7db89 100644 --- a/fs/nfs/nfs4super.c +++ b/fs/nfs/nfs4super.c @@ -67,7 +67,7 @@ static void nfs4_evict_inode(struct inode *inode) nfs_inode_evict_delegation(inode); /* Note that above delegreturn would trigger pnfs return-on-close */ pnfs_return_layout(inode); - pnfs_destroy_layout(NFS_I(inode)); + pnfs_destroy_layout_final(NFS_I(inode)); /* First call standard NFS clear_inode() code */ nfs_clear_inode(inode); nfs4_xattr_cache_zap(inode); @@ -227,7 +227,7 @@ int nfs4_try_get_tree(struct fs_context *fc) fc, ctx->nfs_server.hostname, ctx->nfs_server.export_path); if (err) { - nfs_errorf(fc, "NFS4: Couldn't follow remote path"); + nfs_ferrorf(fc, MOUNT, "NFS4: Couldn't follow remote path"); dfprintk(MOUNT, "<-- nfs4_try_get_tree() = %d [error]\n", err); } else { dfprintk(MOUNT, "<-- nfs4_try_get_tree() = 0\n"); @@ -250,7 +250,7 @@ int nfs4_get_referral_tree(struct fs_context *fc) fc, ctx->nfs_server.hostname, ctx->nfs_server.export_path); if (err) { - nfs_errorf(fc, "NFS4: Couldn't follow remote path"); + nfs_ferrorf(fc, MOUNT, "NFS4: Couldn't follow remote path"); dfprintk(MOUNT, "<-- nfs4_get_referral_tree() = %d [error]\n", err); } else { dfprintk(MOUNT, "<-- nfs4_get_referral_tree() = 0\n"); diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h index 484c1da96dea..48d761e593fb 100644 --- a/fs/nfs/nfs4trace.h +++ b/fs/nfs/nfs4trace.h @@ -2189,6 +2189,81 @@ DEFINE_PNFS_LAYOUT_EVENT(pnfs_mds_fallback_write_done); DEFINE_PNFS_LAYOUT_EVENT(pnfs_mds_fallback_read_pagelist); DEFINE_PNFS_LAYOUT_EVENT(pnfs_mds_fallback_write_pagelist); +DECLARE_EVENT_CLASS(nfs4_deviceid_event, + TP_PROTO( + const struct nfs_client *clp, + const struct nfs4_deviceid *deviceid + ), + + TP_ARGS(clp, deviceid), + + TP_STRUCT__entry( + __string(dstaddr, clp->cl_hostname) + __array(unsigned char, deviceid, NFS4_DEVICEID4_SIZE) + ), + + TP_fast_assign( + __assign_str(dstaddr, clp->cl_hostname); + memcpy(__entry->deviceid, deviceid->data, + NFS4_DEVICEID4_SIZE); + ), + + TP_printk( + "deviceid=%s, dstaddr=%s", + __print_hex(__entry->deviceid, NFS4_DEVICEID4_SIZE), + __get_str(dstaddr) + ) +); +#define DEFINE_PNFS_DEVICEID_EVENT(name) \ + DEFINE_EVENT(nfs4_deviceid_event, name, \ + TP_PROTO(const struct nfs_client *clp, \ + const struct nfs4_deviceid *deviceid \ + ), \ + TP_ARGS(clp, deviceid)) +DEFINE_PNFS_DEVICEID_EVENT(nfs4_deviceid_free); + +DECLARE_EVENT_CLASS(nfs4_deviceid_status, + TP_PROTO( + const struct nfs_server *server, + const struct nfs4_deviceid *deviceid, + int status + ), + + TP_ARGS(server, deviceid, status), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(int, status) + __string(dstaddr, server->nfs_client->cl_hostname) + __array(unsigned char, deviceid, NFS4_DEVICEID4_SIZE) + ), + + TP_fast_assign( + __entry->dev = server->s_dev; + __entry->status = status; + __assign_str(dstaddr, server->nfs_client->cl_hostname); + memcpy(__entry->deviceid, deviceid->data, + NFS4_DEVICEID4_SIZE); + ), + + TP_printk( + "dev=%02x:%02x: deviceid=%s, dstaddr=%s, status=%d", + MAJOR(__entry->dev), MINOR(__entry->dev), + __print_hex(__entry->deviceid, NFS4_DEVICEID4_SIZE), + __get_str(dstaddr), + __entry->status + ) +); +#define DEFINE_PNFS_DEVICEID_STATUS(name) \ + DEFINE_EVENT(nfs4_deviceid_status, name, \ + TP_PROTO(const struct nfs_server *server, \ + const struct nfs4_deviceid *deviceid, \ + int status \ + ), \ + TP_ARGS(server, deviceid, status)) +DEFINE_PNFS_DEVICEID_STATUS(nfs4_getdeviceinfo); +DEFINE_PNFS_DEVICEID_STATUS(nfs4_find_deviceid); + DECLARE_EVENT_CLASS(nfs4_flexfiles_io_event, TP_PROTO( const struct nfs_pgio_header *hdr diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index c6dbfcae7517..ac6b79ee9355 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -84,6 +84,7 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req, /* lock,open owner id: * we currently use size 2 (u64) out of (NFS4_OPAQUE_LIMIT >> 2) */ +#define pagepad_maxsz (1) #define open_owner_id_maxsz (1 + 2 + 1 + 1 + 2) #define lock_owner_id_maxsz (1 + 1 + 4) #define decode_lockowner_maxsz (1 + XDR_QUADLEN(IDMAP_NAMESZ)) @@ -215,14 +216,14 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req, nfs4_fattr_bitmap_maxsz) #define encode_read_maxsz (op_encode_hdr_maxsz + \ encode_stateid_maxsz + 3) -#define decode_read_maxsz (op_decode_hdr_maxsz + 2 + 1) +#define decode_read_maxsz (op_decode_hdr_maxsz + 2 + pagepad_maxsz) #define encode_readdir_maxsz (op_encode_hdr_maxsz + \ 2 + encode_verifier_maxsz + 5 + \ nfs4_label_maxsz) #define decode_readdir_maxsz (op_decode_hdr_maxsz + \ - decode_verifier_maxsz + 1) + decode_verifier_maxsz + pagepad_maxsz) #define encode_readlink_maxsz (op_encode_hdr_maxsz) -#define decode_readlink_maxsz (op_decode_hdr_maxsz + 1 + 1) +#define decode_readlink_maxsz (op_decode_hdr_maxsz + 1 + pagepad_maxsz) #define encode_write_maxsz (op_encode_hdr_maxsz + \ encode_stateid_maxsz + 4) #define decode_write_maxsz (op_decode_hdr_maxsz + \ @@ -284,14 +285,14 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req, #define decode_delegreturn_maxsz (op_decode_hdr_maxsz) #define encode_getacl_maxsz (encode_getattr_maxsz) #define decode_getacl_maxsz (op_decode_hdr_maxsz + \ - nfs4_fattr_bitmap_maxsz + 1 + 1) + nfs4_fattr_bitmap_maxsz + 1 + pagepad_maxsz) #define encode_setacl_maxsz (op_encode_hdr_maxsz + \ encode_stateid_maxsz + 3) #define decode_setacl_maxsz (decode_setattr_maxsz) #define encode_fs_locations_maxsz \ (encode_getattr_maxsz) #define decode_fs_locations_maxsz \ - (1) + (pagepad_maxsz) #define encode_secinfo_maxsz (op_encode_hdr_maxsz + nfs4_name_maxsz) #define decode_secinfo_maxsz (op_decode_hdr_maxsz + 1 + ((NFS_MAX_SECFLAVORS * (16 + GSS_OID_MAX_LEN)) / 4)) @@ -393,12 +394,13 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req, /* devaddr4 payload is read into page */ \ 1 /* notification bitmap length */ + \ 1 /* notification bitmap, word 0 */ + \ - 1 /* possible XDR padding */) + pagepad_maxsz /* possible XDR padding */) #define encode_layoutget_maxsz (op_encode_hdr_maxsz + 10 + \ encode_stateid_maxsz) #define decode_layoutget_maxsz (op_decode_hdr_maxsz + 8 + \ decode_stateid_maxsz + \ - XDR_QUADLEN(PNFS_LAYOUT_MAXSIZE) + 1) + XDR_QUADLEN(PNFS_LAYOUT_MAXSIZE) + \ + pagepad_maxsz) #define encode_layoutcommit_maxsz (op_encode_hdr_maxsz + \ 2 /* offset */ + \ 2 /* length */ + \ @@ -2342,7 +2344,7 @@ static void nfs4_xdr_enc_open(struct rpc_rqst *req, struct xdr_stream *xdr, encode_layoutget(xdr, args->lg_args, &hdr); rpc_prepare_reply_pages(req, args->lg_args->layout.pages, 0, args->lg_args->layout.pglen, - hdr.replen); + hdr.replen - pagepad_maxsz); } encode_nops(&hdr); } @@ -2388,7 +2390,7 @@ static void nfs4_xdr_enc_open_noattr(struct rpc_rqst *req, encode_layoutget(xdr, args->lg_args, &hdr); rpc_prepare_reply_pages(req, args->lg_args->layout.pages, 0, args->lg_args->layout.pglen, - hdr.replen); + hdr.replen - pagepad_maxsz); } encode_nops(&hdr); } @@ -2499,7 +2501,7 @@ static void nfs4_xdr_enc_readlink(struct rpc_rqst *req, struct xdr_stream *xdr, encode_readlink(xdr, args, req, &hdr); rpc_prepare_reply_pages(req, args->pages, args->pgbase, - args->pglen, hdr.replen); + args->pglen, hdr.replen - pagepad_maxsz); encode_nops(&hdr); } @@ -2520,7 +2522,7 @@ static void nfs4_xdr_enc_readdir(struct rpc_rqst *req, struct xdr_stream *xdr, encode_readdir(xdr, args, req, &hdr); rpc_prepare_reply_pages(req, args->pages, args->pgbase, - args->count, hdr.replen); + args->count, hdr.replen - pagepad_maxsz); encode_nops(&hdr); } @@ -2541,7 +2543,7 @@ static void nfs4_xdr_enc_read(struct rpc_rqst *req, struct xdr_stream *xdr, encode_read(xdr, args, &hdr); rpc_prepare_reply_pages(req, args->pages, args->pgbase, - args->count, hdr.replen); + args->count, hdr.replen - pagepad_maxsz); req->rq_rcv_buf.flags |= XDRBUF_READ; encode_nops(&hdr); } @@ -2588,7 +2590,7 @@ static void nfs4_xdr_enc_getacl(struct rpc_rqst *req, struct xdr_stream *xdr, ARRAY_SIZE(nfs4_acl_bitmap), &hdr); rpc_prepare_reply_pages(req, args->acl_pages, 0, - args->acl_len, replen + 1); + args->acl_len, replen); encode_nops(&hdr); } @@ -2810,7 +2812,7 @@ static void nfs4_xdr_enc_fs_locations(struct rpc_rqst *req, } rpc_prepare_reply_pages(req, (struct page **)&args->page, 0, - PAGE_SIZE, replen + 1); + PAGE_SIZE, replen); encode_nops(&hdr); } @@ -3009,15 +3011,19 @@ static void nfs4_xdr_enc_getdeviceinfo(struct rpc_rqst *req, struct compound_hdr hdr = { .minorversion = nfs4_xdr_minorversion(&args->seq_args), }; + uint32_t replen; encode_compound_hdr(xdr, req, &hdr); encode_sequence(xdr, &args->seq_args, &hdr); + + replen = hdr.replen + op_decode_hdr_maxsz + 2; + encode_getdeviceinfo(xdr, args, &hdr); - /* set up reply kvec. Subtract notification bitmap max size (2) - * so that notification bitmap is put in xdr_buf tail */ + /* set up reply kvec. device_addr4 opaque data is read into the + * pages */ rpc_prepare_reply_pages(req, args->pdev->pages, args->pdev->pgbase, - args->pdev->pglen, hdr.replen - 2); + args->pdev->pglen, replen); encode_nops(&hdr); } @@ -3039,7 +3045,7 @@ static void nfs4_xdr_enc_layoutget(struct rpc_rqst *req, encode_layoutget(xdr, args, &hdr); rpc_prepare_reply_pages(req, args->layout.pages, 0, - args->layout.pglen, hdr.replen); + args->layout.pglen, hdr.replen - pagepad_maxsz); encode_nops(&hdr); } @@ -5331,11 +5337,11 @@ static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req, res->acl_len = attrlen; /* Check for receive buffer overflow */ - if (res->acl_len > (xdr->nwords << 2) || + if (res->acl_len > xdr_stream_remaining(xdr) || res->acl_len + res->acl_data_offset > xdr->buf->page_len) { res->acl_flags |= NFS4_ACL_TRUNC; - dprintk("NFS: acl reply: attrlen %u > page_len %u\n", - attrlen, xdr->nwords << 2); + dprintk("NFS: acl reply: attrlen %u > page_len %zu\n", + attrlen, xdr_stream_remaining(xdr)); } } else status = -EOPNOTSUPP; @@ -6403,10 +6409,8 @@ nfs4_xdr_dec_getacl(struct rpc_rqst *rqstp, struct xdr_stream *xdr, struct compound_hdr hdr; int status; - if (res->acl_scratch != NULL) { - void *p = page_address(res->acl_scratch); - xdr_set_scratch_buffer(xdr, p, PAGE_SIZE); - } + if (res->acl_scratch != NULL) + xdr_set_scratch_page(xdr, res->acl_scratch); status = decode_compound_hdr(xdr, &hdr); if (status) goto out; diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c index 8d3278805602..fa148308822c 100644 --- a/fs/nfs/nfsroot.c +++ b/fs/nfs/nfsroot.c @@ -88,7 +88,13 @@ #define NFS_ROOT "/tftpboot/%s" /* Default NFSROOT mount options. */ +#if defined(CONFIG_NFS_V2) #define NFS_DEF_OPTIONS "vers=2,tcp,rsize=4096,wsize=4096" +#elif defined(CONFIG_NFS_V3) +#define NFS_DEF_OPTIONS "vers=3,tcp,rsize=4096,wsize=4096" +#else +#define NFS_DEF_OPTIONS "vers=4,tcp,rsize=4096,wsize=4096" +#endif /* Parameters passed from the kernel command line */ static char nfs_root_parms[NFS_MAXPATHLEN + 1] __initdata = ""; diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 6985cacf4700..78c9c4bdef2b 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -31,13 +31,29 @@ static struct kmem_cache *nfs_page_cachep; static const struct rpc_call_ops nfs_pgio_common_ops; +static struct nfs_pgio_mirror * +nfs_pgio_get_mirror(struct nfs_pageio_descriptor *desc, u32 idx) +{ + if (desc->pg_ops->pg_get_mirror) + return desc->pg_ops->pg_get_mirror(desc, idx); + return &desc->pg_mirrors[0]; +} + struct nfs_pgio_mirror * nfs_pgio_current_mirror(struct nfs_pageio_descriptor *desc) { - return &desc->pg_mirrors[desc->pg_mirror_idx]; + return nfs_pgio_get_mirror(desc, desc->pg_mirror_idx); } EXPORT_SYMBOL_GPL(nfs_pgio_current_mirror); +static u32 +nfs_pgio_set_current_mirror(struct nfs_pageio_descriptor *desc, u32 idx) +{ + if (desc->pg_ops->pg_set_mirror) + return desc->pg_ops->pg_set_mirror(desc, idx); + return desc->pg_mirror_idx; +} + void nfs_pgheader_init(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr, void (*release)(struct nfs_pgio_header *hdr)) @@ -1259,7 +1275,7 @@ static void nfs_pageio_error_cleanup(struct nfs_pageio_descriptor *desc) return; for (midx = 0; midx < desc->pg_mirror_count; midx++) { - mirror = &desc->pg_mirrors[midx]; + mirror = nfs_pgio_get_mirror(desc, midx); desc->pg_completion_ops->error_cleanup(&mirror->pg_list, desc->pg_error); } @@ -1293,12 +1309,12 @@ int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, goto out_failed; } - desc->pg_mirror_idx = midx; + nfs_pgio_set_current_mirror(desc, midx); if (!nfs_pageio_add_request_mirror(desc, dupreq)) goto out_cleanup_subreq; } - desc->pg_mirror_idx = 0; + nfs_pgio_set_current_mirror(desc, 0); if (!nfs_pageio_add_request_mirror(desc, req)) goto out_failed; @@ -1320,10 +1336,12 @@ out_failed: static void nfs_pageio_complete_mirror(struct nfs_pageio_descriptor *desc, u32 mirror_idx) { - struct nfs_pgio_mirror *mirror = &desc->pg_mirrors[mirror_idx]; - u32 restore_idx = desc->pg_mirror_idx; + struct nfs_pgio_mirror *mirror; + u32 restore_idx; + + restore_idx = nfs_pgio_set_current_mirror(desc, mirror_idx); + mirror = nfs_pgio_current_mirror(desc); - desc->pg_mirror_idx = mirror_idx; for (;;) { nfs_pageio_doio(desc); if (desc->pg_error < 0 || !mirror->pg_recoalesce) @@ -1331,7 +1349,7 @@ static void nfs_pageio_complete_mirror(struct nfs_pageio_descriptor *desc, if (!nfs_do_recoalesce(desc)) break; } - desc->pg_mirror_idx = restore_idx; + nfs_pgio_set_current_mirror(desc, restore_idx); } /* @@ -1405,7 +1423,7 @@ void nfs_pageio_cond_complete(struct nfs_pageio_descriptor *desc, pgoff_t index) u32 midx; for (midx = 0; midx < desc->pg_mirror_count; midx++) { - mirror = &desc->pg_mirrors[midx]; + mirror = nfs_pgio_get_mirror(desc, midx); if (!list_empty(&mirror->pg_list)) { prev = nfs_list_entry(mirror->pg_list.prev); if (index != prev->wb_index + 1) { diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 0e50b9d45c32..af64b4e6fd1f 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -294,6 +294,7 @@ void pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo) { struct inode *inode; + unsigned long i_state; if (!lo) return; @@ -304,8 +305,12 @@ pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo) if (!list_empty(&lo->plh_segs)) WARN_ONCE(1, "NFS: BUG unfreed layout segments.\n"); pnfs_detach_layout_hdr(lo); + i_state = inode->i_state; spin_unlock(&inode->i_lock); pnfs_free_layout_hdr(lo); + /* Notify pnfs_destroy_layout_final() that we're done */ + if (i_state & (I_FREEING | I_CLEAR)) + wake_up_var(lo); } } @@ -319,6 +324,21 @@ pnfs_grab_inode_layout_hdr(struct pnfs_layout_hdr *lo) return NULL; } +/* + * Compare 2 layout stateid sequence ids, to see which is newer, + * taking into account wraparound issues. + */ +static bool pnfs_seqid_is_newer(u32 s1, u32 s2) +{ + return (s32)(s1 - s2) > 0; +} + +static void pnfs_barrier_update(struct pnfs_layout_hdr *lo, u32 newseq) +{ + if (pnfs_seqid_is_newer(newseq, lo->plh_barrier)) + lo->plh_barrier = newseq; +} + static void pnfs_set_plh_return_info(struct pnfs_layout_hdr *lo, enum pnfs_iomode iomode, u32 seq) @@ -330,6 +350,7 @@ pnfs_set_plh_return_info(struct pnfs_layout_hdr *lo, enum pnfs_iomode iomode, if (seq != 0) { WARN_ON_ONCE(lo->plh_return_seq != 0 && lo->plh_return_seq != seq); lo->plh_return_seq = seq; + pnfs_barrier_update(lo, seq); } } @@ -634,15 +655,6 @@ static int mark_lseg_invalid(struct pnfs_layout_segment *lseg, return rv; } -/* - * Compare 2 layout stateid sequence ids, to see which is newer, - * taking into account wraparound issues. - */ -static bool pnfs_seqid_is_newer(u32 s1, u32 s2) -{ - return (s32)(s1 - s2) > 0; -} - static bool pnfs_should_free_range(const struct pnfs_layout_range *lseg_range, const struct pnfs_layout_range *recall_range) @@ -734,8 +746,7 @@ pnfs_free_lseg_list(struct list_head *free_me) } } -void -pnfs_destroy_layout(struct nfs_inode *nfsi) +static struct pnfs_layout_hdr *__pnfs_destroy_layout(struct nfs_inode *nfsi) { struct pnfs_layout_hdr *lo; LIST_HEAD(tmp_list); @@ -753,9 +764,34 @@ pnfs_destroy_layout(struct nfs_inode *nfsi) pnfs_put_layout_hdr(lo); } else spin_unlock(&nfsi->vfs_inode.i_lock); + return lo; +} + +void pnfs_destroy_layout(struct nfs_inode *nfsi) +{ + __pnfs_destroy_layout(nfsi); } EXPORT_SYMBOL_GPL(pnfs_destroy_layout); +static bool pnfs_layout_removed(struct nfs_inode *nfsi, + struct pnfs_layout_hdr *lo) +{ + bool ret; + + spin_lock(&nfsi->vfs_inode.i_lock); + ret = nfsi->layout != lo; + spin_unlock(&nfsi->vfs_inode.i_lock); + return ret; +} + +void pnfs_destroy_layout_final(struct nfs_inode *nfsi) +{ + struct pnfs_layout_hdr *lo = __pnfs_destroy_layout(nfsi); + + if (lo) + wait_var_event(lo, pnfs_layout_removed(nfsi, lo)); +} + static bool pnfs_layout_add_bulk_destroy_list(struct inode *inode, struct list_head *layout_list) @@ -955,8 +991,7 @@ pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, const nfs4_stateid *new, new_barrier = be32_to_cpu(new->seqid); else if (new_barrier == 0) return; - if (pnfs_seqid_is_newer(new_barrier, lo->plh_barrier)) - lo->plh_barrier = new_barrier; + pnfs_barrier_update(lo, new_barrier); } static bool @@ -965,7 +1000,7 @@ pnfs_layout_stateid_blocked(const struct pnfs_layout_hdr *lo, { u32 seqid = be32_to_cpu(stateid->seqid); - return !pnfs_seqid_is_newer(seqid, lo->plh_barrier); + return !pnfs_seqid_is_newer(seqid, lo->plh_barrier) && lo->plh_barrier; } /* lget is set to 1 if called from inside send_layoutget call chain */ @@ -1123,7 +1158,7 @@ void pnfs_layoutreturn_free_lsegs(struct pnfs_layout_hdr *lo, LIST_HEAD(freeme); spin_lock(&inode->i_lock); - if (!pnfs_layout_is_valid(lo) || !arg_stateid || + if (!pnfs_layout_is_valid(lo) || !nfs4_stateid_match_other(&lo->plh_stateid, arg_stateid)) goto out_unlock; if (stateid) { @@ -1154,20 +1189,17 @@ pnfs_prepare_layoutreturn(struct pnfs_layout_hdr *lo, return false; set_bit(NFS_LAYOUT_RETURN, &lo->plh_flags); pnfs_get_layout_hdr(lo); + nfs4_stateid_copy(stateid, &lo->plh_stateid); + *cred = get_cred(lo->plh_lc_cred); if (test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags)) { - nfs4_stateid_copy(stateid, &lo->plh_stateid); - *cred = get_cred(lo->plh_lc_cred); if (lo->plh_return_seq != 0) stateid->seqid = cpu_to_be32(lo->plh_return_seq); if (iomode != NULL) *iomode = lo->plh_return_iomode; pnfs_clear_layoutreturn_info(lo); - return true; - } - nfs4_stateid_copy(stateid, &lo->plh_stateid); - *cred = get_cred(lo->plh_lc_cred); - if (iomode != NULL) + } else if (iomode != NULL) *iomode = IOMODE_ANY; + pnfs_barrier_update(lo, be32_to_cpu(stateid->seqid)); return true; } @@ -1480,10 +1512,8 @@ out_noroc: return false; } -int pnfs_roc_done(struct rpc_task *task, struct inode *inode, - struct nfs4_layoutreturn_args **argpp, - struct nfs4_layoutreturn_res **respp, - int *ret) +int pnfs_roc_done(struct rpc_task *task, struct nfs4_layoutreturn_args **argpp, + struct nfs4_layoutreturn_res **respp, int *ret) { struct nfs4_layoutreturn_args *arg = *argpp; int retval = -EAGAIN; @@ -1516,7 +1546,7 @@ int pnfs_roc_done(struct rpc_task *task, struct inode *inode, return 0; case -NFS4ERR_OLD_STATEID: if (!nfs4_layout_refresh_old_stateid(&arg->stateid, - &arg->range, inode)) + &arg->range, arg->inode)) break; *ret = -NFS4ERR_NOMATCHING_LAYOUT; return -EAGAIN; @@ -1531,23 +1561,28 @@ void pnfs_roc_release(struct nfs4_layoutreturn_args *args, int ret) { struct pnfs_layout_hdr *lo = args->layout; - const nfs4_stateid *arg_stateid = NULL; + struct inode *inode = args->inode; const nfs4_stateid *res_stateid = NULL; struct nfs4_xdr_opaque_data *ld_private = args->ld_private; switch (ret) { case -NFS4ERR_NOMATCHING_LAYOUT: + spin_lock(&inode->i_lock); + if (pnfs_layout_is_valid(lo) && + nfs4_stateid_match_other(&args->stateid, &lo->plh_stateid)) + pnfs_set_plh_return_info(lo, args->range.iomode, 0); + pnfs_clear_layoutreturn_waitbit(lo); + spin_unlock(&inode->i_lock); break; case 0: if (res->lrs_present) res_stateid = &res->stateid; fallthrough; default: - arg_stateid = &args->stateid; + pnfs_layoutreturn_free_lsegs(lo, &args->stateid, &args->range, + res_stateid); } trace_nfs4_layoutreturn_on_close(args->inode, &args->stateid, ret); - pnfs_layoutreturn_free_lsegs(lo, arg_stateid, &args->range, - res_stateid); if (ld_private && ld_private->ops && ld_private->ops->free) ld_private->ops->free(ld_private); pnfs_put_layout_hdr(lo); @@ -1877,6 +1912,11 @@ static void nfs_layoutget_end(struct pnfs_layout_hdr *lo) wake_up_var(&lo->plh_outstanding); } +static bool pnfs_is_first_layoutget(struct pnfs_layout_hdr *lo) +{ + return test_bit(NFS_LAYOUT_FIRST_LAYOUTGET, &lo->plh_flags); +} + static void pnfs_clear_first_layoutget(struct pnfs_layout_hdr *lo) { unsigned long *bitlock = &lo->plh_flags; @@ -1986,6 +2026,27 @@ lookup_again: goto lookup_again; } + /* + * Because we free lsegs when sending LAYOUTRETURN, we need to wait + * for LAYOUTRETURN. + */ + if (test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags)) { + spin_unlock(&ino->i_lock); + dprintk("%s wait for layoutreturn\n", __func__); + lseg = ERR_PTR(pnfs_prepare_to_retry_layoutget(lo)); + if (!IS_ERR(lseg)) { + pnfs_put_layout_hdr(lo); + dprintk("%s retrying\n", __func__); + trace_pnfs_update_layout(ino, pos, count, iomode, lo, + lseg, + PNFS_UPDATE_LAYOUT_RETRY); + goto lookup_again; + } + trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, + PNFS_UPDATE_LAYOUT_RETURN); + goto out_put_layout_hdr; + } + lseg = pnfs_find_lseg(lo, &arg, strict_iomode); if (lseg) { trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, @@ -2038,28 +2099,6 @@ lookup_again: nfs4_stateid_copy(&stateid, &lo->plh_stateid); } - /* - * Because we free lsegs before sending LAYOUTRETURN, we need to wait - * for LAYOUTRETURN even if first is true. - */ - if (test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags)) { - spin_unlock(&ino->i_lock); - dprintk("%s wait for layoutreturn\n", __func__); - lseg = ERR_PTR(pnfs_prepare_to_retry_layoutget(lo)); - if (!IS_ERR(lseg)) { - if (first) - pnfs_clear_first_layoutget(lo); - pnfs_put_layout_hdr(lo); - dprintk("%s retrying\n", __func__); - trace_pnfs_update_layout(ino, pos, count, iomode, lo, - lseg, PNFS_UPDATE_LAYOUT_RETRY); - goto lookup_again; - } - trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, - PNFS_UPDATE_LAYOUT_RETURN); - goto out_put_layout_hdr; - } - if (pnfs_layoutgets_blocked(lo)) { trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, PNFS_UPDATE_LAYOUT_BLOCKED); @@ -2213,6 +2252,7 @@ static void _lgopen_prepare_attached(struct nfs4_opendata *data, &rng, GFP_KERNEL); if (!lgp) { pnfs_clear_first_layoutget(lo); + nfs_layoutget_end(lo); pnfs_put_layout_hdr(lo); return; } @@ -2351,23 +2391,34 @@ pnfs_layout_process(struct nfs4_layoutget *lgp) goto out_forget; } - if (!pnfs_layout_is_valid(lo)) { - /* We have a completely new layout */ - pnfs_set_layout_stateid(lo, &res->stateid, lgp->cred, true); - } else if (nfs4_stateid_match_other(&lo->plh_stateid, &res->stateid)) { + if (nfs4_stateid_match_other(&lo->plh_stateid, &res->stateid)) { /* existing state ID, make sure the sequence number matches. */ if (pnfs_layout_stateid_blocked(lo, &res->stateid)) { + if (!pnfs_layout_is_valid(lo) && + pnfs_is_first_layoutget(lo)) + lo->plh_barrier = 0; dprintk("%s forget reply due to sequence\n", __func__); goto out_forget; } pnfs_set_layout_stateid(lo, &res->stateid, lgp->cred, false); - } else { + } else if (pnfs_layout_is_valid(lo)) { /* * We got an entirely new state ID. Mark all segments for the * inode invalid, and retry the layoutget */ - pnfs_mark_layout_stateid_invalid(lo, &free_me); + struct pnfs_layout_range range = { + .iomode = IOMODE_ANY, + .length = NFS4_MAX_UINT64, + }; + pnfs_set_plh_return_info(lo, IOMODE_ANY, 0); + pnfs_mark_matching_lsegs_return(lo, &lo->plh_return_segs, + &range, 0); goto out_forget; + } else { + /* We have a completely new layout */ + if (!pnfs_is_first_layoutget(lo)) + goto out_forget; + pnfs_set_layout_stateid(lo, &res->stateid, lgp->cred, true); } pnfs_get_lseg(lseg); diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 2661c44c62db..d810ae674f4e 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -51,6 +51,8 @@ struct nfs4_pnfs_ds_addr { size_t da_addrlen; struct list_head da_node; /* nfs4_pnfs_dev_hlist dev_dslist */ char *da_remotestr; /* human readable addr+port */ + const char *da_netid; + int da_transport; }; struct nfs4_pnfs_ds { @@ -266,6 +268,7 @@ struct pnfs_layout_segment *pnfs_layout_process(struct nfs4_layoutget *lgp); void pnfs_layoutget_free(struct nfs4_layoutget *lgp); void pnfs_free_lseg_list(struct list_head *tmp_list); void pnfs_destroy_layout(struct nfs_inode *); +void pnfs_destroy_layout_final(struct nfs_inode *); void pnfs_destroy_all_layouts(struct nfs_client *); int pnfs_destroy_layouts_byfsid(struct nfs_client *clp, struct nfs_fsid *fsid, @@ -294,10 +297,8 @@ bool pnfs_roc(struct inode *ino, struct nfs4_layoutreturn_args *args, struct nfs4_layoutreturn_res *res, const struct cred *cred); -int pnfs_roc_done(struct rpc_task *task, struct inode *inode, - struct nfs4_layoutreturn_args **argpp, - struct nfs4_layoutreturn_res **respp, - int *ret); +int pnfs_roc_done(struct rpc_task *task, struct nfs4_layoutreturn_args **argpp, + struct nfs4_layoutreturn_res **respp, int *ret); void pnfs_roc_release(struct nfs4_layoutreturn_args *args, struct nfs4_layoutreturn_res *res, int ret); @@ -710,6 +711,10 @@ static inline void pnfs_destroy_layout(struct nfs_inode *nfsi) { } +static inline void pnfs_destroy_layout_final(struct nfs_inode *nfsi) +{ +} + static inline struct pnfs_layout_segment * pnfs_get_lseg(struct pnfs_layout_segment *lseg) { @@ -765,7 +770,7 @@ pnfs_roc(struct inode *ino, } static inline int -pnfs_roc_done(struct rpc_task *task, struct inode *inode, +pnfs_roc_done(struct rpc_task *task, struct nfs4_layoutreturn_args **argpp, struct nfs4_layoutreturn_res **respp, int *ret) diff --git a/fs/nfs/pnfs_dev.c b/fs/nfs/pnfs_dev.c index 537b80d693f1..ddbbf4fcda86 100644 --- a/fs/nfs/pnfs_dev.c +++ b/fs/nfs/pnfs_dev.c @@ -34,6 +34,8 @@ #include "internal.h" #include "pnfs.h" +#include "nfs4trace.h" + #define NFSDBG_FACILITY NFSDBG_PNFS /* @@ -192,24 +194,28 @@ nfs4_find_get_deviceid(struct nfs_server *server, d = __nfs4_find_get_deviceid(server, id, hash); if (d) - return d; + goto found; new = nfs4_get_device_info(server, id, cred, gfp_mask); - if (!new) + if (!new) { + trace_nfs4_find_deviceid(server, id, -ENOENT); return new; + } spin_lock(&nfs4_deviceid_lock); d = __nfs4_find_get_deviceid(server, id, hash); if (d) { spin_unlock(&nfs4_deviceid_lock); server->pnfs_curr_ld->free_deviceid_node(new); - return d; + } else { + atomic_inc(&new->ref); + hlist_add_head_rcu(&new->node, &nfs4_deviceid_cache[hash]); + spin_unlock(&nfs4_deviceid_lock); + d = new; } - hlist_add_head_rcu(&new->node, &nfs4_deviceid_cache[hash]); - atomic_inc(&new->ref); - spin_unlock(&nfs4_deviceid_lock); - - return new; +found: + trace_nfs4_find_deviceid(server, id, 0); + return d; } EXPORT_SYMBOL_GPL(nfs4_find_get_deviceid); @@ -278,6 +284,7 @@ nfs4_put_deviceid_node(struct nfs4_deviceid_node *d) } if (!atomic_dec_and_test(&d->ref)) return false; + trace_nfs4_deviceid_free(d->nfs_client, &d->deviceid); d->ld->free_deviceid_node(d); return true; } diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c index 679767ac258d..49d3389bd813 100644 --- a/fs/nfs/pnfs_nfs.c +++ b/fs/nfs/pnfs_nfs.c @@ -78,22 +78,18 @@ void pnfs_generic_clear_request_commit(struct nfs_page *req, struct nfs_commit_info *cinfo) { - struct pnfs_layout_segment *freeme = NULL; + struct pnfs_commit_bucket *bucket = NULL; if (!test_and_clear_bit(PG_COMMIT_TO_DS, &req->wb_flags)) goto out; cinfo->ds->nwritten--; - if (list_is_singular(&req->wb_list)) { - struct pnfs_commit_bucket *bucket; - + if (list_is_singular(&req->wb_list)) bucket = list_first_entry(&req->wb_list, - struct pnfs_commit_bucket, - written); - freeme = pnfs_free_bucket_lseg(bucket); - } + struct pnfs_commit_bucket, written); out: nfs_request_remove_commit_list(req, cinfo); - pnfs_put_lseg(freeme); + if (bucket) + pnfs_put_lseg(pnfs_free_bucket_lseg(bucket)); } EXPORT_SYMBOL_GPL(pnfs_generic_clear_request_commit); @@ -407,12 +403,16 @@ pnfs_bucket_get_committing(struct list_head *head, struct pnfs_commit_bucket *bucket, struct nfs_commit_info *cinfo) { + struct pnfs_layout_segment *lseg; struct list_head *pos; list_for_each(pos, &bucket->committing) cinfo->ds->ncommitting--; list_splice_init(&bucket->committing, head); - return pnfs_free_bucket_lseg(bucket); + lseg = pnfs_free_bucket_lseg(bucket); + if (!lseg) + lseg = pnfs_get_lseg(bucket->lseg); + return lseg; } static struct nfs_commit_data * @@ -424,8 +424,6 @@ pnfs_bucket_fetch_commitdata(struct pnfs_commit_bucket *bucket, if (!data) return NULL; data->lseg = pnfs_bucket_get_committing(&data->pages, bucket, cinfo); - if (!data->lseg) - data->lseg = pnfs_get_lseg(bucket->lseg); return data; } @@ -661,6 +659,21 @@ _data_server_lookup_locked(const struct list_head *dsaddrs) return NULL; } +static struct nfs4_pnfs_ds_addr *nfs4_pnfs_ds_addr_alloc(gfp_t gfp_flags) +{ + struct nfs4_pnfs_ds_addr *da = kzalloc(sizeof(*da), gfp_flags); + if (da) + INIT_LIST_HEAD(&da->da_node); + return da; +} + +static void nfs4_pnfs_ds_addr_free(struct nfs4_pnfs_ds_addr *da) +{ + kfree(da->da_remotestr); + kfree(da->da_netid); + kfree(da); +} + static void destroy_ds(struct nfs4_pnfs_ds *ds) { struct nfs4_pnfs_ds_addr *da; @@ -676,8 +689,7 @@ static void destroy_ds(struct nfs4_pnfs_ds *ds) struct nfs4_pnfs_ds_addr, da_node); list_del_init(&da->da_node); - kfree(da->da_remotestr); - kfree(da); + nfs4_pnfs_ds_addr_free(da); } kfree(ds->ds_remotestr); @@ -854,12 +866,17 @@ static int _nfs4_pnfs_v3_ds_connect(struct nfs_server *mds_srv, if (!IS_ERR(clp)) { struct xprt_create xprt_args = { - .ident = XPRT_TRANSPORT_TCP, + .ident = da->da_transport, .net = clp->cl_net, .dstaddr = (struct sockaddr *)&da->da_addr, .addrlen = da->da_addrlen, .servername = clp->cl_hostname, }; + + if (da->da_transport != clp->cl_proto) + continue; + if (da->da_addr.ss_family != clp->cl_addr.ss_family) + continue; /* Add this address as an alias */ rpc_clnt_add_xprt(clp->cl_rpcclient, &xprt_args, rpc_clnt_test_and_add_xprt, NULL); @@ -867,7 +884,7 @@ static int _nfs4_pnfs_v3_ds_connect(struct nfs_server *mds_srv, } clp = get_v3_ds_connect(mds_srv, (struct sockaddr *)&da->da_addr, - da->da_addrlen, IPPROTO_TCP, + da->da_addrlen, da->da_transport, timeo, retrans); if (IS_ERR(clp)) continue; @@ -905,7 +922,7 @@ static int _nfs4_pnfs_v4_ds_connect(struct nfs_server *mds_srv, if (!IS_ERR(clp) && clp->cl_mvops->session_trunk) { struct xprt_create xprt_args = { - .ident = XPRT_TRANSPORT_TCP, + .ident = da->da_transport, .net = clp->cl_net, .dstaddr = (struct sockaddr *)&da->da_addr, .addrlen = da->da_addrlen, @@ -913,17 +930,21 @@ static int _nfs4_pnfs_v4_ds_connect(struct nfs_server *mds_srv, }; struct nfs4_add_xprt_data xprtdata = { .clp = clp, - .cred = nfs4_get_clid_cred(clp), }; struct rpc_add_xprt_test rpcdata = { .add_xprt_test = clp->cl_mvops->session_trunk, .data = &xprtdata, }; + if (da->da_transport != clp->cl_proto) + continue; + if (da->da_addr.ss_family != clp->cl_addr.ss_family) + continue; /** * Test this address for session trunking and * add as an alias */ + xprtdata.cred = nfs4_get_clid_cred(clp), rpc_clnt_add_xprt(clp->cl_rpcclient, &xprt_args, rpc_clnt_setup_test_and_add_xprt, &rpcdata); @@ -932,8 +953,9 @@ static int _nfs4_pnfs_v4_ds_connect(struct nfs_server *mds_srv, } else { clp = nfs4_set_ds_client(mds_srv, (struct sockaddr *)&da->da_addr, - da->da_addrlen, IPPROTO_TCP, - timeo, retrans, minor_version); + da->da_addrlen, + da->da_transport, timeo, + retrans, minor_version); if (IS_ERR(clp)) continue; @@ -1021,55 +1043,26 @@ nfs4_decode_mp_ds_addr(struct net *net, struct xdr_stream *xdr, gfp_t gfp_flags) struct nfs4_pnfs_ds_addr *da = NULL; char *buf, *portstr; __be16 port; - int nlen, rlen; + ssize_t nlen, rlen; int tmp[2]; - __be32 *p; - char *netid, *match_netid; - size_t len, match_netid_len; + char *netid; + size_t len; char *startsep = ""; char *endsep = ""; /* r_netid */ - p = xdr_inline_decode(xdr, 4); - if (unlikely(!p)) + nlen = xdr_stream_decode_string_dup(xdr, &netid, XDR_MAX_NETOBJ, + gfp_flags); + if (unlikely(nlen < 0)) goto out_err; - nlen = be32_to_cpup(p++); - - p = xdr_inline_decode(xdr, nlen); - if (unlikely(!p)) - goto out_err; - - netid = kmalloc(nlen+1, gfp_flags); - if (unlikely(!netid)) - goto out_err; - - netid[nlen] = '\0'; - memcpy(netid, p, nlen); /* r_addr: ip/ip6addr with port in dec octets - see RFC 5665 */ - p = xdr_inline_decode(xdr, 4); - if (unlikely(!p)) - goto out_free_netid; - rlen = be32_to_cpup(p); - - p = xdr_inline_decode(xdr, rlen); - if (unlikely(!p)) - goto out_free_netid; - /* port is ".ABC.DEF", 8 chars max */ - if (rlen > INET6_ADDRSTRLEN + IPV6_SCOPE_ID_LEN + 8) { - dprintk("%s: Invalid address, length %d\n", __func__, - rlen); - goto out_free_netid; - } - buf = kmalloc(rlen + 1, gfp_flags); - if (!buf) { - dprintk("%s: Not enough memory\n", __func__); + rlen = xdr_stream_decode_string_dup(xdr, &buf, INET6_ADDRSTRLEN + + IPV6_SCOPE_ID_LEN + 8, gfp_flags); + if (unlikely(rlen < 0)) goto out_free_netid; - } - buf[rlen] = '\0'; - memcpy(buf, p, rlen); /* replace port '.' with '-' */ portstr = strrchr(buf, '.'); @@ -1089,12 +1082,10 @@ nfs4_decode_mp_ds_addr(struct net *net, struct xdr_stream *xdr, gfp_t gfp_flags) } *portstr = '\0'; - da = kzalloc(sizeof(*da), gfp_flags); + da = nfs4_pnfs_ds_addr_alloc(gfp_flags); if (unlikely(!da)) goto out_free_buf; - INIT_LIST_HEAD(&da->da_node); - if (!rpc_pton(net, buf, portstr-buf, (struct sockaddr *)&da->da_addr, sizeof(da->da_addr))) { dprintk("%s: error parsing address %s\n", __func__, buf); @@ -1109,15 +1100,11 @@ nfs4_decode_mp_ds_addr(struct net *net, struct xdr_stream *xdr, gfp_t gfp_flags) case AF_INET: ((struct sockaddr_in *)&da->da_addr)->sin_port = port; da->da_addrlen = sizeof(struct sockaddr_in); - match_netid = "tcp"; - match_netid_len = 3; break; case AF_INET6: ((struct sockaddr_in6 *)&da->da_addr)->sin6_port = port; da->da_addrlen = sizeof(struct sockaddr_in6); - match_netid = "tcp6"; - match_netid_len = 4; startsep = "["; endsep = "]"; break; @@ -1128,12 +1115,15 @@ nfs4_decode_mp_ds_addr(struct net *net, struct xdr_stream *xdr, gfp_t gfp_flags) goto out_free_da; } - if (nlen != match_netid_len || strncmp(netid, match_netid, nlen)) { - dprintk("%s: ERROR: r_netid \"%s\" != \"%s\"\n", - __func__, netid, match_netid); + da->da_transport = xprt_find_transport_ident(netid); + if (da->da_transport < 0) { + dprintk("%s: ERROR: unknown r_netid \"%s\"\n", + __func__, netid); goto out_free_da; } + da->da_netid = netid; + /* save human readable address */ len = strlen(startsep) + strlen(buf) + strlen(endsep) + 7; da->da_remotestr = kzalloc(len, gfp_flags); @@ -1145,7 +1135,6 @@ nfs4_decode_mp_ds_addr(struct net *net, struct xdr_stream *xdr, gfp_t gfp_flags) dprintk("%s: Parsed DS addr %s\n", __func__, da->da_remotestr); kfree(buf); - kfree(netid); return da; out_free_da: diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index 15c865cc837f..73ab7c59d3a7 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -499,26 +499,26 @@ nfs_proc_rmdir(struct inode *dir, const struct qstr *name) * sure it is syntactically correct; the entries itself are decoded * from nfs_readdir by calling the decode_entry function directly. */ -static int -nfs_proc_readdir(struct dentry *dentry, const struct cred *cred, - u64 cookie, struct page **pages, unsigned int count, bool plus) +static int nfs_proc_readdir(struct nfs_readdir_arg *nr_arg, + struct nfs_readdir_res *nr_res) { - struct inode *dir = d_inode(dentry); + struct inode *dir = d_inode(nr_arg->dentry); struct nfs_readdirargs arg = { .fh = NFS_FH(dir), - .cookie = cookie, - .count = count, - .pages = pages, + .cookie = nr_arg->cookie, + .count = nr_arg->page_len, + .pages = nr_arg->pages, }; struct rpc_message msg = { .rpc_proc = &nfs_procedures[NFSPROC_READDIR], .rpc_argp = &arg, - .rpc_cred = cred, + .rpc_cred = nr_arg->cred, }; int status; - dprintk("NFS call readdir %d\n", (unsigned int)cookie); + dprintk("NFS call readdir %llu\n", (unsigned long long)nr_arg->cookie); status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); + nr_res->verf[0] = nr_res->verf[1] = 0; nfs_invalidate_atime(dir); diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 4034102010f0..c7a924580eec 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -86,9 +86,11 @@ const struct super_operations nfs_sops = { }; EXPORT_SYMBOL_GPL(nfs_sops); +#ifdef CONFIG_NFS_V4_2 static const struct nfs_ssc_client_ops nfs_ssc_clnt_ops_tbl = { .sco_sb_deactive = nfs_sb_deactive, }; +#endif #if IS_ENABLED(CONFIG_NFS_V4) static int __init register_nfs4_fs(void) @@ -111,15 +113,21 @@ static void unregister_nfs4_fs(void) } #endif +#ifdef CONFIG_NFS_V4_2 static void nfs_ssc_register_ops(void) { +#ifdef CONFIG_NFSD_V4 nfs_ssc_register(&nfs_ssc_clnt_ops_tbl); +#endif } static void nfs_ssc_unregister_ops(void) { +#ifdef CONFIG_NFSD_V4 nfs_ssc_unregister(&nfs_ssc_clnt_ops_tbl); +#endif } +#endif /* CONFIG_NFS_V4_2 */ static struct shrinker acl_shrinker = { .count_objects = nfs_access_cache_count, @@ -148,7 +156,9 @@ int __init register_nfs_fs(void) ret = register_shrinker(&acl_shrinker); if (ret < 0) goto error_3; +#ifdef CONFIG_NFS_V4_2 nfs_ssc_register_ops(); +#endif return 0; error_3: nfs_unregister_sysctl(); @@ -168,7 +178,9 @@ void __exit unregister_nfs_fs(void) unregister_shrinker(&acl_shrinker); nfs_unregister_sysctl(); unregister_nfs4_fs(); +#ifdef CONFIG_NFS_V4_2 nfs_ssc_unregister_ops(); +#endif unregister_filesystem(&nfs_fs_type); } |