diff options
Diffstat (limited to '')
49 files changed, 1176 insertions, 1086 deletions
diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig index 1ead5bd740c2..14a72224b657 100644 --- a/fs/nfs/Kconfig +++ b/fs/nfs/Kconfig @@ -209,8 +209,8 @@ config NFS_DISABLE_UDP_SUPPORT config NFS_V4_2_READ_PLUS bool "NFS: Enable support for the NFSv4.2 READ_PLUS operation" depends on NFS_V4_2 - default y + default n help - Choose Y here to enable the use of READ_PLUS over NFS v4.2. READ_PLUS - attempts to improve read performance by compressing out sparse holes - in the file contents. + This is intended for developers only. The READ_PLUS operation has + been shown to have issues under specific conditions and should not + be used in production. diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index d0cccddb7d08..321af81c456e 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c @@ -980,14 +980,11 @@ out_invalidcred: } static int -nfs_callback_dispatch(struct svc_rqst *rqstp, __be32 *statp) +nfs_callback_dispatch(struct svc_rqst *rqstp) { const struct svc_procedure *procp = rqstp->rq_procinfo; - svcxdr_init_decode(rqstp); - svcxdr_init_encode(rqstp); - - *statp = procp->pc_func(rqstp); + *rqstp->rq_accept_statp = procp->pc_func(rqstp); return 1; } @@ -1072,7 +1069,8 @@ static const struct svc_procedure nfs4_callback_procedures1[] = { } }; -static unsigned int nfs4_callback_count1[ARRAY_SIZE(nfs4_callback_procedures1)]; +static DEFINE_PER_CPU_ALIGNED(unsigned long, + nfs4_callback_count1[ARRAY_SIZE(nfs4_callback_procedures1)]); const struct svc_version nfs4_callback_version1 = { .vs_vers = 1, .vs_nproc = ARRAY_SIZE(nfs4_callback_procedures1), @@ -1084,7 +1082,8 @@ const struct svc_version nfs4_callback_version1 = { .vs_need_cong_ctrl = true, }; -static unsigned int nfs4_callback_count4[ARRAY_SIZE(nfs4_callback_procedures1)]; +static DEFINE_PER_CPU_ALIGNED(unsigned long, + nfs4_callback_count4[ARRAY_SIZE(nfs4_callback_procedures1)]); const struct svc_version nfs4_callback_version4 = { .vs_vers = 4, .vs_nproc = ARRAY_SIZE(nfs4_callback_procedures1), diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index ea1ceffa1d3a..a41c3ee4549c 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -203,14 +203,14 @@ static void nfs_readdir_page_init_array(struct page *page, u64 last_cookie, { struct nfs_cache_array *array; - array = kmap_atomic(page); + array = kmap_local_page(page); array->change_attr = change_attr; array->last_cookie = last_cookie; array->size = 0; array->page_full = 0; array->page_is_eof = 0; array->cookies_are_ordered = 1; - kunmap_atomic(array); + kunmap_local(array); } /* @@ -221,11 +221,11 @@ static void nfs_readdir_clear_array(struct page *page) struct nfs_cache_array *array; unsigned int i; - array = kmap_atomic(page); + array = kmap_local_page(page); for (i = 0; i < array->size; i++) kfree(array->array[i].name); array->size = 0; - kunmap_atomic(array); + kunmap_local(array); } static void nfs_readdir_free_folio(struct folio *folio) @@ -371,14 +371,14 @@ static pgoff_t nfs_readdir_page_cookie_hash(u64 cookie) static bool nfs_readdir_page_validate(struct page *page, u64 last_cookie, u64 change_attr) { - struct nfs_cache_array *array = kmap_atomic(page); + struct nfs_cache_array *array = kmap_local_page(page); int ret = true; if (array->change_attr != change_attr) ret = false; if (nfs_readdir_array_index_cookie(array) != last_cookie) ret = false; - kunmap_atomic(array); + kunmap_local(array); return ret; } @@ -418,9 +418,9 @@ static u64 nfs_readdir_page_last_cookie(struct page *page) struct nfs_cache_array *array; u64 ret; - array = kmap_atomic(page); + array = kmap_local_page(page); ret = array->last_cookie; - kunmap_atomic(array); + kunmap_local(array); return ret; } @@ -429,9 +429,9 @@ static bool nfs_readdir_page_needs_filling(struct page *page) struct nfs_cache_array *array; bool ret; - array = kmap_atomic(page); + array = kmap_local_page(page); ret = !nfs_readdir_array_is_full(array); - kunmap_atomic(array); + kunmap_local(array); return ret; } @@ -439,9 +439,9 @@ static void nfs_readdir_page_set_eof(struct page *page) { struct nfs_cache_array *array; - array = kmap_atomic(page); + array = kmap_local_page(page); nfs_readdir_array_set_eof(array); - kunmap_atomic(array); + kunmap_local(array); } static struct page *nfs_readdir_page_get_next(struct address_space *mapping, @@ -568,14 +568,14 @@ static int nfs_readdir_search_array(struct nfs_readdir_descriptor *desc) struct nfs_cache_array *array; int status; - array = kmap_atomic(desc->page); + array = kmap_local_page(desc->page); if (desc->dir_cookie == 0) status = nfs_readdir_search_for_pos(array, desc); else status = nfs_readdir_search_for_cookie(array, desc); - kunmap_atomic(array); + kunmap_local(array); return status; } @@ -2296,7 +2296,7 @@ EXPORT_SYMBOL_GPL(nfs_instantiate); * that the operation succeeded on the server, but an error in the * reply path made it appear to have failed. */ -int nfs_create(struct user_namespace *mnt_userns, struct inode *dir, +int nfs_create(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode, bool excl) { struct iattr attr; @@ -2325,7 +2325,7 @@ EXPORT_SYMBOL_GPL(nfs_create); * See comments for nfs_proc_create regarding failed operations. */ int -nfs_mknod(struct user_namespace *mnt_userns, struct inode *dir, +nfs_mknod(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rdev) { struct iattr attr; @@ -2352,7 +2352,7 @@ EXPORT_SYMBOL_GPL(nfs_mknod); /* * See comments for nfs_proc_create regarding failed operations. */ -int nfs_mkdir(struct user_namespace *mnt_userns, struct inode *dir, +int nfs_mkdir(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode) { struct iattr attr; @@ -2524,7 +2524,7 @@ EXPORT_SYMBOL_GPL(nfs_unlink); * now have a new file handle and can instantiate an in-core NFS inode * and move the raw page into its mapping. */ -int nfs_symlink(struct user_namespace *mnt_userns, struct inode *dir, +int nfs_symlink(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, const char *symname) { struct page *page; @@ -2642,7 +2642,7 @@ nfs_unblock_rename(struct rpc_task *task, struct nfs_renamedata *data) * If these conditions are met, we can drop the dentries before doing * the rename. */ -int nfs_rename(struct user_namespace *mnt_userns, struct inode *old_dir, +int nfs_rename(struct mnt_idmap *idmap, struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry, unsigned int flags) { @@ -2957,12 +2957,14 @@ static u64 nfs_access_login_time(const struct task_struct *task, const struct cred *cred) { const struct task_struct *parent; + const struct cred *pcred; u64 ret; rcu_read_lock(); for (;;) { parent = rcu_dereference(task->real_parent); - if (parent == task || cred_fscmp(parent->cred, cred) != 0) + pcred = rcu_dereference(parent->cred); + if (parent == task || cred_fscmp(pcred, cred) != 0) break; task = parent; } @@ -3023,6 +3025,7 @@ static int nfs_access_get_cached_rcu(struct inode *inode, const struct cred *cre * but do it without locking. */ struct nfs_inode *nfsi = NFS_I(inode); + u64 login_time = nfs_access_login_time(current, cred); struct nfs_access_entry *cache; int err = -ECHILD; struct list_head *lh; @@ -3037,6 +3040,8 @@ static int nfs_access_get_cached_rcu(struct inode *inode, const struct cred *cre cache = NULL; if (cache == NULL) goto out; + if ((s64)(login_time - cache->timestamp) > 0) + goto out; if (nfs_check_cache_invalid(inode, NFS_INO_INVALID_ACCESS)) goto out; *mask = cache->mask; @@ -3257,7 +3262,7 @@ static int nfs_execute_ok(struct inode *inode, int mask) return ret; } -int nfs_permission(struct user_namespace *mnt_userns, +int nfs_permission(struct mnt_idmap *idmap, struct inode *inode, int mask) { @@ -3308,7 +3313,7 @@ out_notsup: res = nfs_revalidate_inode(inode, NFS_INO_INVALID_MODE | NFS_INO_INVALID_OTHER); if (res == 0) - res = generic_permission(&init_user_ns, inode, mask); + res = generic_permission(&nop_mnt_idmap, inode, mask); goto out; } EXPORT_SYMBOL_GPL(nfs_permission); diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 1707f46b1335..9a18c5a69ace 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -343,14 +343,12 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq, struct nfs_page *req; unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase); /* XXX do we need to do the eof zeroing found in async_filler? */ - req = nfs_create_request(dreq->ctx, pagevec[i], - pgbase, req_len); + req = nfs_page_create_from_page(dreq->ctx, pagevec[i], + pgbase, pos, req_len); if (IS_ERR(req)) { result = PTR_ERR(req); break; } - req->wb_index = pos >> PAGE_SHIFT; - req->wb_offset = pos & ~PAGE_MASK; if (!nfs_pageio_add_request(&desc, req)) { result = desc.pg_error; nfs_release_request(req); @@ -802,8 +800,8 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, struct nfs_page *req; unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase); - req = nfs_create_request(dreq->ctx, pagevec[i], - pgbase, req_len); + req = nfs_page_create_from_page(dreq->ctx, pagevec[i], + pgbase, pos, req_len); if (IS_ERR(req)) { result = PTR_ERR(req); break; @@ -816,8 +814,6 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, } nfs_lock_request(req); - req->wb_index = pos >> PAGE_SHIFT; - req->wb_offset = pos & ~PAGE_MASK; if (!nfs_pageio_add_request(&desc, req)) { result = desc.pg_error; nfs_unlock_and_release_request(req); diff --git a/fs/nfs/export.c b/fs/nfs/export.c index 01596f2d0a1e..d6a6d1ebb8fd 100644 --- a/fs/nfs/export.c +++ b/fs/nfs/export.c @@ -42,7 +42,7 @@ nfs_encode_fh(struct inode *inode, __u32 *p, int *max_len, struct inode *parent) dprintk("%s: max fh len %d inode %p parent %p", __func__, *max_len, inode, parent); - if (*max_len < len || IS_AUTOMOUNT(inode)) { + if (*max_len < len) { dprintk("%s: fh len %d too small, required %d\n", __func__, *max_len, len); *max_len = len; @@ -145,17 +145,10 @@ out: return parent; } -static u64 nfs_fetch_iversion(struct inode *inode) -{ - nfs_revalidate_inode(inode, NFS_INO_INVALID_CHANGE); - return inode_peek_iversion_raw(inode); -} - const struct export_operations nfs_export_ops = { .encode_fh = nfs_encode_fh, .fh_to_dentry = nfs_fh_to_dentry, .get_parent = nfs_get_parent, - .fetch_iversion = nfs_fetch_iversion, .flags = EXPORT_OP_NOWCC|EXPORT_OP_NOSUBTREECHK| EXPORT_OP_CLOSE_BEFORE_UNLINK|EXPORT_OP_REMOTE_FS| EXPORT_OP_NOATOMIC_ATTR, diff --git a/fs/nfs/file.c b/fs/nfs/file.c index d8ec889a4b3f..893625eacab9 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -31,6 +31,7 @@ #include <linux/swap.h> #include <linux/uaccess.h> +#include <linux/filelock.h> #include "delegation.h" #include "internal.h" @@ -276,27 +277,28 @@ EXPORT_SYMBOL_GPL(nfs_file_fsync); * and that the new data won't completely replace the old data in * that range of the file. */ -static bool nfs_full_page_write(struct page *page, loff_t pos, unsigned int len) +static bool nfs_folio_is_full_write(struct folio *folio, loff_t pos, + unsigned int len) { - unsigned int pglen = nfs_page_length(page); - unsigned int offset = pos & (PAGE_SIZE - 1); + unsigned int pglen = nfs_folio_length(folio); + unsigned int offset = offset_in_folio(folio, pos); unsigned int end = offset + len; return !pglen || (end >= pglen && !offset); } -static bool nfs_want_read_modify_write(struct file *file, struct page *page, - loff_t pos, unsigned int len) +static bool nfs_want_read_modify_write(struct file *file, struct folio *folio, + loff_t pos, unsigned int len) { /* * Up-to-date pages, those with ongoing or full-page write * don't need read/modify/write */ - if (PageUptodate(page) || PagePrivate(page) || - nfs_full_page_write(page, pos, len)) + if (folio_test_uptodate(folio) || folio_test_private(folio) || + nfs_folio_is_full_write(folio, pos, len)) return false; - if (pnfs_ld_read_whole_page(file->f_mapping->host)) + if (pnfs_ld_read_whole_page(file_inode(file))) return true; /* Open for reading too? */ if (file->f_mode & FMODE_READ) @@ -304,6 +306,15 @@ static bool nfs_want_read_modify_write(struct file *file, struct page *page, return false; } +static struct folio * +nfs_folio_grab_cache_write_begin(struct address_space *mapping, pgoff_t index) +{ + unsigned fgp_flags = FGP_LOCK | FGP_WRITE | FGP_CREAT | FGP_STABLE; + + return __filemap_get_folio(mapping, index, fgp_flags, + mapping_gfp_mask(mapping)); +} + /* * This does the "real" work of the write. We must allocate and lock the * page to be sent back to the generic routine, which then copies the @@ -313,32 +324,31 @@ static bool nfs_want_read_modify_write(struct file *file, struct page *page, * increment the page use counts until he is done with the page. */ static int nfs_write_begin(struct file *file, struct address_space *mapping, - loff_t pos, unsigned len, - struct page **pagep, void **fsdata) + loff_t pos, unsigned len, struct page **pagep, + void **fsdata) { - int ret; - pgoff_t index = pos >> PAGE_SHIFT; - struct page *page; + struct folio *folio; int once_thru = 0; + int ret; dfprintk(PAGECACHE, "NFS: write_begin(%pD2(%lu), %u@%lld)\n", file, mapping->host->i_ino, len, (long long) pos); start: - page = grab_cache_page_write_begin(mapping, index); - if (!page) + folio = nfs_folio_grab_cache_write_begin(mapping, pos >> PAGE_SHIFT); + if (!folio) return -ENOMEM; - *pagep = page; + *pagep = &folio->page; - ret = nfs_flush_incompatible(file, page); + ret = nfs_flush_incompatible(file, folio); if (ret) { - unlock_page(page); - put_page(page); + folio_unlock(folio); + folio_put(folio); } else if (!once_thru && - nfs_want_read_modify_write(file, page, pos, len)) { + nfs_want_read_modify_write(file, folio, pos, len)) { once_thru = 1; - ret = nfs_read_folio(file, page_folio(page)); - put_page(page); + ret = nfs_read_folio(file, folio); + folio_put(folio); if (!ret) goto start; } @@ -346,11 +356,12 @@ start: } static int nfs_write_end(struct file *file, struct address_space *mapping, - loff_t pos, unsigned len, unsigned copied, - struct page *page, void *fsdata) + loff_t pos, unsigned len, unsigned copied, + struct page *page, void *fsdata) { - unsigned offset = pos & (PAGE_SIZE - 1); struct nfs_open_context *ctx = nfs_file_open_context(file); + struct folio *folio = page_folio(page); + unsigned offset = offset_in_folio(folio, pos); int status; dfprintk(PAGECACHE, "NFS: write_end(%pD2(%lu), %u@%lld)\n", @@ -360,26 +371,26 @@ static int nfs_write_end(struct file *file, struct address_space *mapping, * Zero any uninitialised parts of the page, and then mark the page * as up to date if it turns out that we're extending the file. */ - if (!PageUptodate(page)) { - unsigned pglen = nfs_page_length(page); + if (!folio_test_uptodate(folio)) { + size_t fsize = folio_size(folio); + unsigned pglen = nfs_folio_length(folio); unsigned end = offset + copied; if (pglen == 0) { - zero_user_segments(page, 0, offset, - end, PAGE_SIZE); - SetPageUptodate(page); + folio_zero_segments(folio, 0, offset, end, fsize); + folio_mark_uptodate(folio); } else if (end >= pglen) { - zero_user_segment(page, end, PAGE_SIZE); + folio_zero_segment(folio, end, fsize); if (offset == 0) - SetPageUptodate(page); + folio_mark_uptodate(folio); } else - zero_user_segment(page, pglen, PAGE_SIZE); + folio_zero_segment(folio, pglen, fsize); } - status = nfs_updatepage(file, page, offset, copied); + status = nfs_update_folio(file, folio, offset, copied); - unlock_page(page); - put_page(page); + folio_unlock(folio); + folio_put(folio); if (status < 0) return status; @@ -401,14 +412,16 @@ static int nfs_write_end(struct file *file, struct address_space *mapping, static void nfs_invalidate_folio(struct folio *folio, size_t offset, size_t length) { + struct inode *inode = folio_file_mapping(folio)->host; dfprintk(PAGECACHE, "NFS: invalidate_folio(%lu, %zu, %zu)\n", folio->index, offset, length); if (offset != 0 || length < folio_size(folio)) return; /* Cancel any unstarted writes on this page */ - nfs_wb_folio_cancel(folio->mapping->host, folio); + nfs_wb_folio_cancel(inode, folio); folio_wait_fscache(folio); + trace_nfs_invalidate_folio(inode, folio); } /* @@ -422,8 +435,13 @@ static bool nfs_release_folio(struct folio *folio, gfp_t gfp) dfprintk(PAGECACHE, "NFS: release_folio(%p)\n", folio); /* If the private flag is set, then the folio is not freeable */ - if (folio_test_private(folio)) - return false; + if (folio_test_private(folio)) { + if ((current_gfp_context(gfp) & GFP_KERNEL) != GFP_KERNEL || + current_is_kswapd()) + return false; + if (nfs_wb_folio(folio_file_mapping(folio)->host, folio) < 0) + return false; + } return nfs_fscache_release_folio(folio, gfp); } @@ -464,12 +482,15 @@ static void nfs_check_dirty_writeback(struct folio *folio, static int nfs_launder_folio(struct folio *folio) { struct inode *inode = folio->mapping->host; + int ret; dfprintk(PAGECACHE, "NFS: launder_folio(%ld, %llu)\n", inode->i_ino, folio_pos(folio)); folio_wait_fscache(folio); - return nfs_wb_page(inode, &folio->page); + ret = nfs_wb_folio(inode, folio); + trace_nfs_launder_folio_done(inode, folio, ret); + return ret; } static int nfs_swap_activate(struct swap_info_struct *sis, struct file *file, @@ -546,22 +567,22 @@ const struct address_space_operations nfs_file_aops = { */ static vm_fault_t nfs_vm_page_mkwrite(struct vm_fault *vmf) { - struct page *page = vmf->page; struct file *filp = vmf->vma->vm_file; struct inode *inode = file_inode(filp); unsigned pagelen; vm_fault_t ret = VM_FAULT_NOPAGE; struct address_space *mapping; + struct folio *folio = page_folio(vmf->page); dfprintk(PAGECACHE, "NFS: vm_page_mkwrite(%pD2(%lu), offset %lld)\n", - filp, filp->f_mapping->host->i_ino, - (long long)page_offset(page)); + filp, filp->f_mapping->host->i_ino, + (long long)folio_file_pos(folio)); sb_start_pagefault(inode->i_sb); /* make sure the cache has finished storing the page */ - if (PageFsCache(page) && - wait_on_page_fscache_killable(vmf->page) < 0) { + if (folio_test_fscache(folio) && + folio_wait_fscache_killable(folio) < 0) { ret = VM_FAULT_RETRY; goto out; } @@ -570,25 +591,25 @@ static vm_fault_t nfs_vm_page_mkwrite(struct vm_fault *vmf) nfs_wait_bit_killable, TASK_KILLABLE|TASK_FREEZABLE_UNSAFE); - lock_page(page); - mapping = page_file_mapping(page); + folio_lock(folio); + mapping = folio_file_mapping(folio); if (mapping != inode->i_mapping) goto out_unlock; - wait_on_page_writeback(page); + folio_wait_writeback(folio); - pagelen = nfs_page_length(page); + pagelen = nfs_folio_length(folio); if (pagelen == 0) goto out_unlock; ret = VM_FAULT_LOCKED; - if (nfs_flush_incompatible(filp, page) == 0 && - nfs_updatepage(filp, page, 0, pagelen) == 0) + if (nfs_flush_incompatible(filp, folio) == 0 && + nfs_update_folio(filp, folio, 0, pagelen) == 0) goto out; ret = VM_FAULT_SIGBUS; out_unlock: - unlock_page(page); + folio_unlock(folio); out: sb_end_pagefault(inode->i_sb); return ret; diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c index ad34a33b0737..ce8f8934bca5 100644 --- a/fs/nfs/filelayout/filelayout.c +++ b/fs/nfs/filelayout/filelayout.c @@ -783,6 +783,12 @@ filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid, return &fl->generic_hdr; } +static bool +filelayout_lseg_is_striped(const struct nfs4_filelayout_segment *flseg) +{ + return flseg->num_fh > 1; +} + /* * filelayout_pg_test(). Called by nfs_can_coalesce_requests() * @@ -803,6 +809,8 @@ filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, size = pnfs_generic_pg_test(pgio, prev, req); if (!size) return 0; + else if (!filelayout_lseg_is_striped(FILELAYOUT_LSEG(pgio->pg_lseg))) + return size; /* see if req and prev are in the same stripe */ if (prev) { @@ -854,6 +862,8 @@ fl_pnfs_update_layout(struct inode *ino, status = filelayout_check_deviceid(lo, fl, gfp_flags); if (status) { + pnfs_error_mark_layout_for_return(ino, lseg); + pnfs_set_lo_fail(lseg); pnfs_put_lseg(lseg); lseg = NULL; } diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c index e731c00a9fcb..ea5f2976dfab 100644 --- a/fs/nfs/fscache.c +++ b/fs/nfs/fscache.c @@ -245,14 +245,12 @@ static int fscache_fallback_read_page(struct inode *inode, struct page *page) struct netfs_cache_resources cres; struct fscache_cookie *cookie = nfs_i_fscache(inode); struct iov_iter iter; - struct bio_vec bvec[1]; + struct bio_vec bvec; int ret; memset(&cres, 0, sizeof(cres)); - bvec[0].bv_page = page; - bvec[0].bv_offset = 0; - bvec[0].bv_len = PAGE_SIZE; - iov_iter_bvec(&iter, ITER_DEST, bvec, ARRAY_SIZE(bvec), PAGE_SIZE); + bvec_set_page(&bvec, page, PAGE_SIZE, 0); + iov_iter_bvec(&iter, ITER_DEST, &bvec, 1, PAGE_SIZE); ret = fscache_begin_read_operation(&cres, cookie); if (ret < 0) @@ -273,16 +271,14 @@ static int fscache_fallback_write_page(struct inode *inode, struct page *page, struct netfs_cache_resources cres; struct fscache_cookie *cookie = nfs_i_fscache(inode); struct iov_iter iter; - struct bio_vec bvec[1]; + struct bio_vec bvec; loff_t start = page_offset(page); size_t len = PAGE_SIZE; int ret; memset(&cres, 0, sizeof(cres)); - bvec[0].bv_page = page; - bvec[0].bv_offset = 0; - bvec[0].bv_len = PAGE_SIZE; - iov_iter_bvec(&iter, ITER_SOURCE, bvec, ARRAY_SIZE(bvec), PAGE_SIZE); + bvec_set_page(&bvec, page, PAGE_SIZE, 0); + iov_iter_bvec(&iter, ITER_SOURCE, &bvec, 1, PAGE_SIZE); ret = fscache_begin_write_operation(&cres, cookie); if (ret < 0) diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index e98ee7599eeb..222a28320e1c 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -606,7 +606,7 @@ EXPORT_SYMBOL_GPL(nfs_fhget); #define NFS_VALID_ATTRS (ATTR_MODE|ATTR_UID|ATTR_GID|ATTR_SIZE|ATTR_ATIME|ATTR_ATIME_SET|ATTR_MTIME|ATTR_MTIME_SET|ATTR_FILE|ATTR_OPEN) int -nfs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, +nfs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *attr) { struct inode *inode = d_inode(dentry); @@ -825,10 +825,12 @@ static u32 nfs_get_valid_attrmask(struct inode *inode) reply_mask |= STATX_UID | STATX_GID; if (!(cache_validity & NFS_INO_INVALID_BLOCKS)) reply_mask |= STATX_BLOCKS; + if (!(cache_validity & NFS_INO_INVALID_CHANGE)) + reply_mask |= STATX_CHANGE_COOKIE; return reply_mask; } -int nfs_getattr(struct user_namespace *mnt_userns, const struct path *path, +int nfs_getattr(struct mnt_idmap *idmap, const struct path *path, struct kstat *stat, u32 request_mask, unsigned int query_flags) { struct inode *inode = d_inode(path->dentry); @@ -843,7 +845,8 @@ int nfs_getattr(struct user_namespace *mnt_userns, const struct path *path, request_mask &= STATX_TYPE | STATX_MODE | STATX_NLINK | STATX_UID | STATX_GID | STATX_ATIME | STATX_MTIME | STATX_CTIME | - STATX_INO | STATX_SIZE | STATX_BLOCKS; + STATX_INO | STATX_SIZE | STATX_BLOCKS | STATX_BTIME | + STATX_CHANGE_COOKIE; if ((query_flags & AT_STATX_DONT_SYNC) && !force_sync) { if (readdirplus_enabled) @@ -851,8 +854,8 @@ int nfs_getattr(struct user_namespace *mnt_userns, const struct path *path, goto out_no_revalidate; } - /* Flush out writes to the server in order to update c/mtime. */ - if ((request_mask & (STATX_CTIME | STATX_MTIME)) && + /* Flush out writes to the server in order to update c/mtime/version. */ + if ((request_mask & (STATX_CTIME | STATX_MTIME | STATX_CHANGE_COOKIE)) && S_ISREG(inode->i_mode)) filemap_write_and_wait(inode->i_mapping); @@ -872,7 +875,8 @@ int nfs_getattr(struct user_namespace *mnt_userns, const struct path *path, /* Is the user requesting attributes that might need revalidation? */ if (!(request_mask & (STATX_MODE|STATX_NLINK|STATX_ATIME|STATX_CTIME| STATX_MTIME|STATX_UID|STATX_GID| - STATX_SIZE|STATX_BLOCKS))) + STATX_SIZE|STATX_BLOCKS| + STATX_CHANGE_COOKIE))) goto out_no_revalidate; /* Check whether the cached attributes are stale */ @@ -908,8 +912,12 @@ out_no_revalidate: /* Only return attributes that were revalidated. */ stat->result_mask = nfs_get_valid_attrmask(inode) | request_mask; - generic_fillattr(&init_user_ns, inode, stat); + generic_fillattr(&nop_mnt_idmap, inode, stat); stat->ino = nfs_compat_user_ino64(NFS_FILEID(inode)); + stat->change_cookie = inode_peek_iversion_raw(inode); + stat->attributes_mask |= STATX_ATTR_CHANGE_MONOTONIC; + if (server->change_attr_type != NFS4_CHANGE_TYPE_IS_UNDEFINED) + stat->attributes |= STATX_ATTR_CHANGE_MONOTONIC; if (S_ISDIR(inode->i_mode)) stat->blksize = NFS_SERVER(inode)->dtsize; out: diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index ae7d4a8c728c..2a65fe2a63ab 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -384,18 +384,18 @@ extern unsigned long nfs_access_cache_scan(struct shrinker *shrink, struct shrink_control *sc); struct dentry *nfs_lookup(struct inode *, struct dentry *, unsigned int); void nfs_d_prune_case_insensitive_aliases(struct inode *inode); -int nfs_create(struct user_namespace *, struct inode *, struct dentry *, +int nfs_create(struct mnt_idmap *, struct inode *, struct dentry *, umode_t, bool); -int nfs_mkdir(struct user_namespace *, struct inode *, struct dentry *, +int nfs_mkdir(struct mnt_idmap *, struct inode *, struct dentry *, umode_t); int nfs_rmdir(struct inode *, struct dentry *); int nfs_unlink(struct inode *, struct dentry *); -int nfs_symlink(struct user_namespace *, struct inode *, struct dentry *, +int nfs_symlink(struct mnt_idmap *, struct inode *, struct dentry *, const char *); int nfs_link(struct dentry *, struct inode *, struct dentry *); -int nfs_mknod(struct user_namespace *, struct inode *, struct dentry *, umode_t, +int nfs_mknod(struct mnt_idmap *, struct inode *, struct dentry *, umode_t, dev_t); -int nfs_rename(struct user_namespace *, struct inode *, struct dentry *, +int nfs_rename(struct mnt_idmap *, struct inode *, struct dentry *, struct inode *, struct dentry *, unsigned int); #ifdef CONFIG_NFS_V4_2 @@ -760,17 +760,18 @@ void nfs_super_set_maxbytes(struct super_block *sb, __u64 maxfilesize) * Record the page as unstable (an extra writeback period) and mark its * inode as dirty. */ -static inline -void nfs_mark_page_unstable(struct page *page, struct nfs_commit_info *cinfo) +static inline void nfs_folio_mark_unstable(struct folio *folio, + struct nfs_commit_info *cinfo) { - if (!cinfo->dreq) { - struct inode *inode = page_file_mapping(page)->host; + if (folio && !cinfo->dreq) { + struct inode *inode = folio_file_mapping(folio)->host; + long nr = folio_nr_pages(folio); /* This page is really still in write-back - just that the * writeback is happening on the server now. */ - inc_node_page_state(page, NR_WRITEBACK); - inc_wb_stat(&inode_to_bdi(inode)->wb, WB_WRITEBACK); + node_stat_mod_folio(folio, NR_WRITEBACK, nr); + wb_stat_mod(&inode_to_bdi(inode)->wb, WB_WRITEBACK, nr); __mark_inode_dirty(inode, I_DIRTY_DATASYNC); } } @@ -795,6 +796,24 @@ unsigned int nfs_page_length(struct page *page) } /* + * Determine the number of bytes of data the page contains + */ +static inline size_t nfs_folio_length(struct folio *folio) +{ + loff_t i_size = i_size_read(folio_file_mapping(folio)->host); + + if (i_size > 0) { + pgoff_t index = folio_index(folio) >> folio_order(folio); + pgoff_t end_index = (i_size - 1) >> folio_shift(folio); + if (index < end_index) + return folio_size(folio); + if (index == end_index) + return offset_in_folio(folio, i_size - 1) + 1; + } + return 0; +} + +/* * Convert a umode to a dirent->d_type */ static inline @@ -807,11 +826,10 @@ unsigned char nfs_umode_to_dtype(umode_t mode) * Determine the number of pages in an array of length 'len' and * with a base offset of 'base' */ -static inline -unsigned int nfs_page_array_len(unsigned int base, size_t len) +static inline unsigned int nfs_page_array_len(unsigned int base, size_t len) { - return ((unsigned long)len + (unsigned long)base + - PAGE_SIZE - 1) >> PAGE_SHIFT; + return ((unsigned long)len + (unsigned long)base + PAGE_SIZE - 1) >> + PAGE_SHIFT; } /* diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index b0ef7e7ddb30..19d51ebf842c 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c @@ -208,23 +208,23 @@ out_fc: } static int -nfs_namespace_getattr(struct user_namespace *mnt_userns, +nfs_namespace_getattr(struct mnt_idmap *idmap, const struct path *path, struct kstat *stat, u32 request_mask, unsigned int query_flags) { if (NFS_FH(d_inode(path->dentry))->size != 0) - return nfs_getattr(mnt_userns, path, stat, request_mask, + return nfs_getattr(idmap, path, stat, request_mask, query_flags); - generic_fillattr(&init_user_ns, d_inode(path->dentry), stat); + generic_fillattr(&nop_mnt_idmap, d_inode(path->dentry), stat); return 0; } static int -nfs_namespace_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, +nfs_namespace_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *attr) { if (NFS_FH(d_inode(dentry))->size != 0) - return nfs_setattr(mnt_userns, dentry, attr); + return nfs_setattr(idmap, dentry, attr); return -EACCES; } diff --git a/fs/nfs/nfs3_fs.h b/fs/nfs/nfs3_fs.h index df9ca56db347..4fa37dc038b5 100644 --- a/fs/nfs/nfs3_fs.h +++ b/fs/nfs/nfs3_fs.h @@ -12,7 +12,7 @@ */ #ifdef CONFIG_NFS_V3_ACL extern struct posix_acl *nfs3_get_acl(struct inode *inode, int type, bool rcu); -extern int nfs3_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry, +extern int nfs3_set_acl(struct mnt_idmap *idmap, struct dentry *dentry, struct posix_acl *acl, int type); extern int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl, struct posix_acl *dfacl); diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c index 74d11e3c4205..1247f544a440 100644 --- a/fs/nfs/nfs3acl.c +++ b/fs/nfs/nfs3acl.c @@ -255,7 +255,7 @@ int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl, } -int nfs3_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry, +int nfs3_set_acl(struct mnt_idmap *idmap, struct dentry *dentry, struct posix_acl *acl, int type) { struct posix_acl *orig = acl, *dfacl = NULL, *alloc; diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c index ecb428512fe1..93e306bf4430 100644 --- a/fs/nfs/nfs42proc.c +++ b/fs/nfs/nfs42proc.c @@ -460,7 +460,8 @@ ssize_t nfs42_proc_copy(struct file *src, loff_t pos_src, if (err >= 0) break; - if (err == -ENOTSUPP && + if ((err == -ENOTSUPP || + err == -NFS4ERR_OFFLOAD_DENIED) && nfs42_files_from_same_server(src, dst)) { err = -EOPNOTSUPP; break; diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 5edd1704f735..4c9f8bd866ab 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -23,6 +23,7 @@ #define NFS4_MAX_LOOP_ON_RECOVER (10) #include <linux/seqlock.h> +#include <linux/filelock.h> struct idmap; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 40d749f29ed3..22a93ae46cd7 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -7692,7 +7692,7 @@ nfs4_release_lockowner(struct nfs_server *server, struct nfs4_lock_state *lsp) #define XATTR_NAME_NFSV4_ACL "system.nfs4_acl" static int nfs4_xattr_set_nfs4_acl(const struct xattr_handler *handler, - struct user_namespace *mnt_userns, + struct mnt_idmap *idmap, struct dentry *unused, struct inode *inode, const char *key, const void *buf, size_t buflen, int flags) @@ -7716,7 +7716,7 @@ static bool nfs4_xattr_list_nfs4_acl(struct dentry *dentry) #define XATTR_NAME_NFSV4_DACL "system.nfs4_dacl" static int nfs4_xattr_set_nfs4_dacl(const struct xattr_handler *handler, - struct user_namespace *mnt_userns, + struct mnt_idmap *idmap, struct dentry *unused, struct inode *inode, const char *key, const void *buf, size_t buflen, int flags) @@ -7739,7 +7739,7 @@ static bool nfs4_xattr_list_nfs4_dacl(struct dentry *dentry) #define XATTR_NAME_NFSV4_SACL "system.nfs4_sacl" static int nfs4_xattr_set_nfs4_sacl(const struct xattr_handler *handler, - struct user_namespace *mnt_userns, + struct mnt_idmap *idmap, struct dentry *unused, struct inode *inode, const char *key, const void *buf, size_t buflen, int flags) @@ -7764,7 +7764,7 @@ static bool nfs4_xattr_list_nfs4_sacl(struct dentry *dentry) #ifdef CONFIG_NFS_V4_SECURITY_LABEL static int nfs4_xattr_set_nfs4_label(const struct xattr_handler *handler, - struct user_namespace *mnt_userns, + struct mnt_idmap *idmap, struct dentry *unused, struct inode *inode, const char *key, const void *buf, size_t buflen, int flags) @@ -7815,7 +7815,7 @@ nfs4_listxattr_nfs4_label(struct inode *inode, char *list, size_t list_len) #ifdef CONFIG_NFS_V4_2 static int nfs4_xattr_set_nfs4_user(const struct xattr_handler *handler, - struct user_namespace *mnt_userns, + struct mnt_idmap *idmap, struct dentry *unused, struct inode *inode, const char *key, const void *buf, size_t buflen, int flags) @@ -10604,7 +10604,9 @@ static void nfs4_disable_swap(struct inode *inode) /* The state manager thread will now exit once it is * woken. */ - wake_up_var(&NFS_SERVER(inode)->nfs_client->cl_state); + struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; + + nfs4_schedule_state_manager(clp); } static const struct inode_operations nfs4_dir_inode_operations = { diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h index 214bc56f92d2..d27919d7241d 100644 --- a/fs/nfs/nfs4trace.h +++ b/fs/nfs/nfs4trace.h @@ -292,32 +292,34 @@ TRACE_DEFINE_ENUM(NFS4CLNT_MOVED); TRACE_DEFINE_ENUM(NFS4CLNT_LEASE_MOVED); TRACE_DEFINE_ENUM(NFS4CLNT_DELEGATION_EXPIRED); TRACE_DEFINE_ENUM(NFS4CLNT_RUN_MANAGER); +TRACE_DEFINE_ENUM(NFS4CLNT_MANAGER_AVAILABLE); TRACE_DEFINE_ENUM(NFS4CLNT_RECALL_RUNNING); TRACE_DEFINE_ENUM(NFS4CLNT_RECALL_ANY_LAYOUT_READ); TRACE_DEFINE_ENUM(NFS4CLNT_RECALL_ANY_LAYOUT_RW); +TRACE_DEFINE_ENUM(NFS4CLNT_DELEGRETURN_DELAYED); #define show_nfs4_clp_state(state) \ __print_flags(state, "|", \ - { NFS4CLNT_MANAGER_RUNNING, "MANAGER_RUNNING" }, \ - { NFS4CLNT_CHECK_LEASE, "CHECK_LEASE" }, \ - { NFS4CLNT_LEASE_EXPIRED, "LEASE_EXPIRED" }, \ - { NFS4CLNT_RECLAIM_REBOOT, "RECLAIM_REBOOT" }, \ - { NFS4CLNT_RECLAIM_NOGRACE, "RECLAIM_NOGRACE" }, \ - { NFS4CLNT_DELEGRETURN, "DELEGRETURN" }, \ - { NFS4CLNT_SESSION_RESET, "SESSION_RESET" }, \ - { NFS4CLNT_LEASE_CONFIRM, "LEASE_CONFIRM" }, \ - { NFS4CLNT_SERVER_SCOPE_MISMATCH, \ - "SERVER_SCOPE_MISMATCH" }, \ - { NFS4CLNT_PURGE_STATE, "PURGE_STATE" }, \ - { NFS4CLNT_BIND_CONN_TO_SESSION, \ - "BIND_CONN_TO_SESSION" }, \ - { NFS4CLNT_MOVED, "MOVED" }, \ - { NFS4CLNT_LEASE_MOVED, "LEASE_MOVED" }, \ - { NFS4CLNT_DELEGATION_EXPIRED, "DELEGATION_EXPIRED" }, \ - { NFS4CLNT_RUN_MANAGER, "RUN_MANAGER" }, \ - { NFS4CLNT_RECALL_RUNNING, "RECALL_RUNNING" }, \ - { NFS4CLNT_RECALL_ANY_LAYOUT_READ, "RECALL_ANY_LAYOUT_READ" }, \ - { NFS4CLNT_RECALL_ANY_LAYOUT_RW, "RECALL_ANY_LAYOUT_RW" }) + { BIT(NFS4CLNT_MANAGER_RUNNING), "MANAGER_RUNNING" }, \ + { BIT(NFS4CLNT_CHECK_LEASE), "CHECK_LEASE" }, \ + { BIT(NFS4CLNT_LEASE_EXPIRED), "LEASE_EXPIRED" }, \ + { BIT(NFS4CLNT_RECLAIM_REBOOT), "RECLAIM_REBOOT" }, \ + { BIT(NFS4CLNT_RECLAIM_NOGRACE), "RECLAIM_NOGRACE" }, \ + { BIT(NFS4CLNT_DELEGRETURN), "DELEGRETURN" }, \ + { BIT(NFS4CLNT_SESSION_RESET), "SESSION_RESET" }, \ + { BIT(NFS4CLNT_LEASE_CONFIRM), "LEASE_CONFIRM" }, \ + { BIT(NFS4CLNT_SERVER_SCOPE_MISMATCH), "SERVER_SCOPE_MISMATCH" }, \ + { BIT(NFS4CLNT_PURGE_STATE), "PURGE_STATE" }, \ + { BIT(NFS4CLNT_BIND_CONN_TO_SESSION), "BIND_CONN_TO_SESSION" }, \ + { BIT(NFS4CLNT_MOVED), "MOVED" }, \ + { BIT(NFS4CLNT_LEASE_MOVED), "LEASE_MOVED" }, \ + { BIT(NFS4CLNT_DELEGATION_EXPIRED), "DELEGATION_EXPIRED" }, \ + { BIT(NFS4CLNT_RUN_MANAGER), "RUN_MANAGER" }, \ + { BIT(NFS4CLNT_MANAGER_AVAILABLE), "MANAGER_AVAILABLE" }, \ + { BIT(NFS4CLNT_RECALL_RUNNING), "RECALL_RUNNING" }, \ + { BIT(NFS4CLNT_RECALL_ANY_LAYOUT_READ), "RECALL_ANY_LAYOUT_READ" }, \ + { BIT(NFS4CLNT_RECALL_ANY_LAYOUT_RW), "RECALL_ANY_LAYOUT_RW" }, \ + { BIT(NFS4CLNT_DELEGRETURN_DELAYED), "DELERETURN_DELAYED" }) TRACE_EVENT(nfs4_state_mgr, TP_PROTO( diff --git a/fs/nfs/nfstrace.h b/fs/nfs/nfstrace.h index 642f6921852f..a778713343df 100644 --- a/fs/nfs/nfstrace.h +++ b/fs/nfs/nfstrace.h @@ -152,8 +152,6 @@ DEFINE_NFS_INODE_EVENT(nfs_getattr_enter); DEFINE_NFS_INODE_EVENT_DONE(nfs_getattr_exit); DEFINE_NFS_INODE_EVENT(nfs_setattr_enter); DEFINE_NFS_INODE_EVENT_DONE(nfs_setattr_exit); -DEFINE_NFS_INODE_EVENT(nfs_writeback_page_enter); -DEFINE_NFS_INODE_EVENT_DONE(nfs_writeback_page_exit); DEFINE_NFS_INODE_EVENT(nfs_writeback_inode_enter); DEFINE_NFS_INODE_EVENT_DONE(nfs_writeback_inode_exit); DEFINE_NFS_INODE_EVENT(nfs_fsync_enter); @@ -933,13 +931,13 @@ TRACE_EVENT(nfs_sillyrename_unlink, ) ); -TRACE_EVENT(nfs_aop_readpage, +DECLARE_EVENT_CLASS(nfs_folio_event, TP_PROTO( const struct inode *inode, - struct page *page + struct folio *folio ), - TP_ARGS(inode, page), + TP_ARGS(inode, folio), TP_STRUCT__entry( __field(dev_t, dev) @@ -947,6 +945,7 @@ TRACE_EVENT(nfs_aop_readpage, __field(u64, fileid) __field(u64, version) __field(loff_t, offset) + __field(u32, count) ), TP_fast_assign( @@ -956,26 +955,36 @@ TRACE_EVENT(nfs_aop_readpage, __entry->fileid = nfsi->fileid; __entry->fhandle = nfs_fhandle_hash(&nfsi->fh); __entry->version = inode_peek_iversion_raw(inode); - __entry->offset = page_index(page) << PAGE_SHIFT; + __entry->offset = folio_file_pos(folio); + __entry->count = nfs_folio_length(folio); ), TP_printk( - "fileid=%02x:%02x:%llu fhandle=0x%08x version=%llu offset=%lld", + "fileid=%02x:%02x:%llu fhandle=0x%08x version=%llu " + "offset=%lld count=%u", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, __entry->version, - __entry->offset + __entry->offset, __entry->count ) ); -TRACE_EVENT(nfs_aop_readpage_done, +#define DEFINE_NFS_FOLIO_EVENT(name) \ + DEFINE_EVENT(nfs_folio_event, name, \ + TP_PROTO( \ + const struct inode *inode, \ + struct folio *folio \ + ), \ + TP_ARGS(inode, folio)) + +DECLARE_EVENT_CLASS(nfs_folio_event_done, TP_PROTO( const struct inode *inode, - struct page *page, + struct folio *folio, int ret ), - TP_ARGS(inode, page, ret), + TP_ARGS(inode, folio, ret), TP_STRUCT__entry( __field(dev_t, dev) @@ -984,6 +993,7 @@ TRACE_EVENT(nfs_aop_readpage_done, __field(u64, fileid) __field(u64, version) __field(loff_t, offset) + __field(u32, count) ), TP_fast_assign( @@ -993,19 +1003,39 @@ TRACE_EVENT(nfs_aop_readpage_done, __entry->fileid = nfsi->fileid; __entry->fhandle = nfs_fhandle_hash(&nfsi->fh); __entry->version = inode_peek_iversion_raw(inode); - __entry->offset = page_index(page) << PAGE_SHIFT; + __entry->offset = folio_file_pos(folio); + __entry->count = nfs_folio_length(folio); __entry->ret = ret; ), TP_printk( - "fileid=%02x:%02x:%llu fhandle=0x%08x version=%llu offset=%lld ret=%d", + "fileid=%02x:%02x:%llu fhandle=0x%08x version=%llu " + "offset=%lld count=%u ret=%d", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, __entry->version, - __entry->offset, __entry->ret + __entry->offset, __entry->count, __entry->ret ) ); +#define DEFINE_NFS_FOLIO_EVENT_DONE(name) \ + DEFINE_EVENT(nfs_folio_event_done, name, \ + TP_PROTO( \ + const struct inode *inode, \ + struct folio *folio, \ + int ret \ + ), \ + TP_ARGS(inode, folio, ret)) + +DEFINE_NFS_FOLIO_EVENT(nfs_aop_readpage); +DEFINE_NFS_FOLIO_EVENT_DONE(nfs_aop_readpage_done); + +DEFINE_NFS_FOLIO_EVENT(nfs_writeback_folio); +DEFINE_NFS_FOLIO_EVENT_DONE(nfs_writeback_folio_done); + +DEFINE_NFS_FOLIO_EVENT(nfs_invalidate_folio); +DEFINE_NFS_FOLIO_EVENT_DONE(nfs_launder_folio_done); + TRACE_EVENT(nfs_aop_readahead, TP_PROTO( const struct inode *inode, diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 16be6dae524f..64fa8de199de 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -21,6 +21,7 @@ #include <linux/nfs_page.h> #include <linux/nfs_mount.h> #include <linux/export.h> +#include <linux/filelock.h> #include "internal.h" #include "pnfs.h" @@ -31,6 +32,42 @@ static struct kmem_cache *nfs_page_cachep; static const struct rpc_call_ops nfs_pgio_common_ops; +struct nfs_page_iter_page { + const struct nfs_page *req; + size_t count; +}; + +static void nfs_page_iter_page_init(struct nfs_page_iter_page *i, + const struct nfs_page *req) +{ + i->req = req; + i->count = 0; +} + +static void nfs_page_iter_page_advance(struct nfs_page_iter_page *i, size_t sz) +{ + const struct nfs_page *req = i->req; + size_t tmp = i->count + sz; + + i->count = (tmp < req->wb_bytes) ? tmp : req->wb_bytes; +} + +static struct page *nfs_page_iter_page_get(struct nfs_page_iter_page *i) +{ + const struct nfs_page *req = i->req; + struct page *page; + + if (i->count != req->wb_bytes) { + size_t base = i->count + req->wb_pgbase; + size_t len = PAGE_SIZE - offset_in_page(base); + + page = nfs_page_to_page(req, base); + nfs_page_iter_page_advance(i, len); + return page; + } + return NULL; +} + static struct nfs_pgio_mirror * nfs_pgio_get_mirror(struct nfs_pageio_descriptor *desc, u32 idx) { @@ -390,7 +427,7 @@ nfs_page_group_init(struct nfs_page *req, struct nfs_page *prev) * has extra ref from the write/commit path to handle handoff * between write and commit lists. */ if (test_bit(PG_INODE_REF, &prev->wb_head->wb_flags)) { - inode = page_file_mapping(req->wb_page)->host; + inode = nfs_page_to_inode(req); set_bit(PG_INODE_REF, &req->wb_flags); kref_get(&req->wb_kref); atomic_long_inc(&NFS_I(inode)->nrequests); @@ -430,10 +467,9 @@ out: nfs_release_request(head); } -static struct nfs_page * -__nfs_create_request(struct nfs_lock_context *l_ctx, struct page *page, - unsigned int pgbase, unsigned int offset, - unsigned int count) +static struct nfs_page *nfs_page_create(struct nfs_lock_context *l_ctx, + unsigned int pgbase, pgoff_t index, + unsigned int offset, unsigned int count) { struct nfs_page *req; struct nfs_open_context *ctx = l_ctx->open_context; @@ -452,42 +488,90 @@ __nfs_create_request(struct nfs_lock_context *l_ctx, struct page *page, /* Initialize the request struct. Initially, we assume a * long write-back delay. This will be adjusted in * update_nfs_request below if the region is not locked. */ - req->wb_page = page; - if (page) { - req->wb_index = page_index(page); - get_page(page); - } - req->wb_offset = offset; - req->wb_pgbase = pgbase; - req->wb_bytes = count; + req->wb_pgbase = pgbase; + req->wb_index = index; + req->wb_offset = offset; + req->wb_bytes = count; kref_init(&req->wb_kref); req->wb_nio = 0; return req; } +static void nfs_page_assign_folio(struct nfs_page *req, struct folio *folio) +{ + if (folio != NULL) { + req->wb_folio = folio; + folio_get(folio); + set_bit(PG_FOLIO, &req->wb_flags); + } +} + +static void nfs_page_assign_page(struct nfs_page *req, struct page *page) +{ + if (page != NULL) { + req->wb_page = page; + get_page(page); + } +} + /** - * nfs_create_request - Create an NFS read/write request. + * nfs_page_create_from_page - Create an NFS read/write request. * @ctx: open context to use * @page: page to write - * @offset: starting offset within the page for the write + * @pgbase: starting offset within the page for the write + * @offset: file offset for the write * @count: number of bytes to read/write * * The page must be locked by the caller. This makes sure we never * create two different requests for the same page. * User should ensure it is safe to sleep in this function. */ -struct nfs_page * -nfs_create_request(struct nfs_open_context *ctx, struct page *page, - unsigned int offset, unsigned int count) +struct nfs_page *nfs_page_create_from_page(struct nfs_open_context *ctx, + struct page *page, + unsigned int pgbase, loff_t offset, + unsigned int count) +{ + struct nfs_lock_context *l_ctx = nfs_get_lock_context(ctx); + struct nfs_page *ret; + + if (IS_ERR(l_ctx)) + return ERR_CAST(l_ctx); + ret = nfs_page_create(l_ctx, pgbase, offset >> PAGE_SHIFT, + offset_in_page(offset), count); + if (!IS_ERR(ret)) { + nfs_page_assign_page(ret, page); + nfs_page_group_init(ret, NULL); + } + nfs_put_lock_context(l_ctx); + return ret; +} + +/** + * nfs_page_create_from_folio - Create an NFS read/write request. + * @ctx: open context to use + * @folio: folio to write + * @offset: starting offset within the folio for the write + * @count: number of bytes to read/write + * + * The page must be locked by the caller. This makes sure we never + * create two different requests for the same page. + * User should ensure it is safe to sleep in this function. + */ +struct nfs_page *nfs_page_create_from_folio(struct nfs_open_context *ctx, + struct folio *folio, + unsigned int offset, + unsigned int count) { struct nfs_lock_context *l_ctx = nfs_get_lock_context(ctx); struct nfs_page *ret; if (IS_ERR(l_ctx)) return ERR_CAST(l_ctx); - ret = __nfs_create_request(l_ctx, page, offset, offset, count); - if (!IS_ERR(ret)) + ret = nfs_page_create(l_ctx, offset, folio_index(folio), offset, count); + if (!IS_ERR(ret)) { + nfs_page_assign_folio(ret, folio); nfs_page_group_init(ret, NULL); + } nfs_put_lock_context(l_ctx); return ret; } @@ -500,10 +584,16 @@ nfs_create_subreq(struct nfs_page *req, { struct nfs_page *last; struct nfs_page *ret; + struct folio *folio = nfs_page_to_folio(req); + struct page *page = nfs_page_to_page(req, pgbase); - ret = __nfs_create_request(req->wb_lock_context, req->wb_page, - pgbase, offset, count); + ret = nfs_page_create(req->wb_lock_context, pgbase, req->wb_index, + offset, count); if (!IS_ERR(ret)) { + if (folio) + nfs_page_assign_folio(ret, folio); + else + nfs_page_assign_page(ret, page); /* find the last request */ for (last = req->wb_head; last->wb_this_page != req->wb_head; @@ -511,7 +601,6 @@ nfs_create_subreq(struct nfs_page *req, ; nfs_lock_request(ret); - ret->wb_index = req->wb_index; nfs_page_group_init(ret, last); ret->wb_nio = req->wb_nio; } @@ -550,11 +639,16 @@ void nfs_unlock_and_release_request(struct nfs_page *req) */ static void nfs_clear_request(struct nfs_page *req) { + struct folio *folio = nfs_page_to_folio(req); struct page *page = req->wb_page; struct nfs_lock_context *l_ctx = req->wb_lock_context; struct nfs_open_context *ctx; - if (page != NULL) { + if (folio != NULL) { + folio_put(folio); + req->wb_folio = NULL; + clear_bit(PG_FOLIO, &req->wb_flags); + } else if (page != NULL) { put_page(page); req->wb_page = NULL; } @@ -692,13 +786,14 @@ EXPORT_SYMBOL_GPL(nfs_pgio_header_free); /** * nfs_pgio_rpcsetup - Set up arguments for a pageio call * @hdr: The pageio hdr + * @pgbase: base * @count: Number of bytes to read * @how: How to commit data (writes only) * @cinfo: Commit information for the call (writes only) */ -static void nfs_pgio_rpcsetup(struct nfs_pgio_header *hdr, - unsigned int count, - int how, struct nfs_commit_info *cinfo) +static void nfs_pgio_rpcsetup(struct nfs_pgio_header *hdr, unsigned int pgbase, + unsigned int count, int how, + struct nfs_commit_info *cinfo) { struct nfs_page *req = hdr->req; @@ -709,7 +804,7 @@ static void nfs_pgio_rpcsetup(struct nfs_pgio_header *hdr, hdr->args.offset = req_offset(req); /* pnfs_set_layoutcommit needs this */ hdr->mds_offset = hdr->args.offset; - hdr->args.pgbase = req->wb_pgbase; + hdr->args.pgbase = pgbase; hdr->args.pages = hdr->page_array.pagevec; hdr->args.count = count; hdr->args.context = get_nfs_open_context(nfs_req_openctx(req)); @@ -895,9 +990,10 @@ int nfs_generic_pgio(struct nfs_pageio_descriptor *desc, struct nfs_commit_info cinfo; struct nfs_page_array *pg_array = &hdr->page_array; unsigned int pagecount, pageused; + unsigned int pg_base = offset_in_page(mirror->pg_base); gfp_t gfp_flags = nfs_io_gfp_mask(); - pagecount = nfs_page_array_len(mirror->pg_base, mirror->pg_count); + pagecount = nfs_page_array_len(pg_base, mirror->pg_count); pg_array->npages = pagecount; if (pagecount <= ARRAY_SIZE(pg_array->page_array)) @@ -917,16 +1013,26 @@ int nfs_generic_pgio(struct nfs_pageio_descriptor *desc, last_page = NULL; pageused = 0; while (!list_empty(head)) { + struct nfs_page_iter_page i; + struct page *page; + req = nfs_list_entry(head->next); nfs_list_move_request(req, &hdr->pages); - if (!last_page || last_page != req->wb_page) { - pageused++; - if (pageused > pagecount) - break; - *pages++ = last_page = req->wb_page; + if (req->wb_pgbase == 0) + last_page = NULL; + + nfs_page_iter_page_init(&i, req); + while ((page = nfs_page_iter_page_get(&i)) != NULL) { + if (last_page != page) { + pageused++; + if (pageused > pagecount) + goto full; + *pages++ = last_page = page; + } } } +full: if (WARN_ON_ONCE(pageused != pagecount)) { nfs_pgio_error(hdr); desc->pg_error = -EINVAL; @@ -938,7 +1044,8 @@ int nfs_generic_pgio(struct nfs_pageio_descriptor *desc, desc->pg_ioflags &= ~FLUSH_COND_STABLE; /* Set up the argument struct */ - nfs_pgio_rpcsetup(hdr, mirror->pg_count, desc->pg_ioflags, &cinfo); + nfs_pgio_rpcsetup(hdr, pg_base, mirror->pg_count, desc->pg_ioflags, + &cinfo); desc->pg_rpc_callops = &nfs_pgio_common_ops; return 0; } @@ -1034,6 +1141,24 @@ static bool nfs_match_lock_context(const struct nfs_lock_context *l1, return l1->lockowner == l2->lockowner; } +static bool nfs_page_is_contiguous(const struct nfs_page *prev, + const struct nfs_page *req) +{ + size_t prev_end = prev->wb_pgbase + prev->wb_bytes; + + if (req_offset(req) != req_offset(prev) + prev->wb_bytes) + return false; + if (req->wb_pgbase == 0) + return prev_end == nfs_page_max_length(prev); + if (req->wb_pgbase == prev_end) { + struct folio *folio = nfs_page_to_folio(req); + if (folio) + return folio == nfs_page_to_folio(prev); + return req->wb_page == prev->wb_page; + } + return false; +} + /** * nfs_coalesce_size - test two requests for compatibility * @prev: pointer to nfs_page @@ -1062,16 +1187,8 @@ static unsigned int nfs_coalesce_size(struct nfs_page *prev, !nfs_match_lock_context(req->wb_lock_context, prev->wb_lock_context)) return 0; - if (req_offset(req) != req_offset(prev) + prev->wb_bytes) + if (!nfs_page_is_contiguous(prev, req)) return 0; - if (req->wb_page == prev->wb_page) { - if (req->wb_pgbase != prev->wb_pgbase + prev->wb_bytes) - return 0; - } else { - if (req->wb_pgbase != 0 || - prev->wb_pgbase + prev->wb_bytes != PAGE_SIZE) - return 0; - } } return pgio->pg_ops->pg_test(pgio, prev, req); } @@ -1411,16 +1528,21 @@ void nfs_pageio_cond_complete(struct nfs_pageio_descriptor *desc, pgoff_t index) { struct nfs_pgio_mirror *mirror; struct nfs_page *prev; + struct folio *folio; u32 midx; for (midx = 0; midx < desc->pg_mirror_count; midx++) { mirror = nfs_pgio_get_mirror(desc, midx); if (!list_empty(&mirror->pg_list)) { prev = nfs_list_entry(mirror->pg_list.prev); - if (index != prev->wb_index + 1) { - nfs_pageio_complete(desc); - break; - } + folio = nfs_page_to_folio(prev); + if (folio) { + if (index == folio_next_index(folio)) + continue; + } else if (index == prev->wb_index + 1) + continue; + nfs_pageio_complete(desc); + break; } } } diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index a5db5158c634..306cba0b9e69 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -511,7 +511,7 @@ pnfs_layout_io_set_failed(struct pnfs_layout_hdr *lo, u32 iomode) spin_lock(&inode->i_lock); pnfs_layout_set_fail_bit(lo, pnfs_iomode_to_fail_bit(iomode)); - pnfs_mark_matching_lsegs_invalid(lo, &head, &range, 0); + pnfs_mark_matching_lsegs_return(lo, &head, &range, 0); spin_unlock(&inode->i_lock); pnfs_free_lseg_list(&head); dprintk("%s Setting layout IOMODE_%s fail bit\n", __func__, diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index e3e6a41f19de..d886c8226d8f 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -193,7 +193,7 @@ struct pnfs_commit_ops { void (*recover_commit_reqs) (struct list_head *list, struct nfs_commit_info *cinfo); struct nfs_page * (*search_commit_reqs)(struct nfs_commit_info *cinfo, - struct page *page); + struct folio *folio); }; struct pnfs_layout_hdr { @@ -395,7 +395,7 @@ void pnfs_generic_rw_release(void *data); void pnfs_generic_recover_commit_reqs(struct list_head *dst, struct nfs_commit_info *cinfo); struct nfs_page *pnfs_generic_search_commit_reqs(struct nfs_commit_info *cinfo, - struct page *page); + struct folio *folio); int pnfs_generic_commit_pagelist(struct inode *inode, struct list_head *mds_pages, int how, @@ -557,13 +557,13 @@ pnfs_recover_commit_reqs(struct list_head *head, struct nfs_commit_info *cinfo) static inline struct nfs_page * pnfs_search_commit_reqs(struct inode *inode, struct nfs_commit_info *cinfo, - struct page *page) + struct folio *folio) { struct pnfs_ds_commit_info *fl_cinfo = cinfo->ds; if (!fl_cinfo->ops || !fl_cinfo->ops->search_commit_reqs) return NULL; - return fl_cinfo->ops->search_commit_reqs(cinfo, page); + return fl_cinfo->ops->search_commit_reqs(cinfo, folio); } /* Should the pNFS client commit and return the layout upon a setattr */ @@ -864,7 +864,7 @@ pnfs_recover_commit_reqs(struct list_head *head, struct nfs_commit_info *cinfo) static inline struct nfs_page * pnfs_search_commit_reqs(struct inode *inode, struct nfs_commit_info *cinfo, - struct page *page) + struct folio *folio) { return NULL; } diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c index 5d035dd2d7bf..a0112ad4937a 100644 --- a/fs/nfs/pnfs_nfs.c +++ b/fs/nfs/pnfs_nfs.c @@ -353,7 +353,7 @@ EXPORT_SYMBOL_GPL(pnfs_generic_recover_commit_reqs); static struct nfs_page * pnfs_bucket_search_commit_reqs(struct pnfs_commit_bucket *buckets, - unsigned int nbuckets, struct page *page) + unsigned int nbuckets, struct folio *folio) { struct nfs_page *req; struct pnfs_commit_bucket *b; @@ -363,11 +363,11 @@ pnfs_bucket_search_commit_reqs(struct pnfs_commit_bucket *buckets, * request is found */ for (i = 0, b = buckets; i < nbuckets; i++, b++) { list_for_each_entry(req, &b->written, wb_list) { - if (req->wb_page == page) + if (nfs_page_to_folio(req) == folio) return req->wb_head; } list_for_each_entry(req, &b->committing, wb_list) { - if (req->wb_page == page) + if (nfs_page_to_folio(req) == folio) return req->wb_head; } } @@ -375,14 +375,14 @@ pnfs_bucket_search_commit_reqs(struct pnfs_commit_bucket *buckets, } /* pnfs_generic_search_commit_reqs - Search lists in @cinfo for the head request - * for @page + * for @folio * @cinfo - commit info for current inode - * @page - page to search for matching head request + * @folio - page to search for matching head request * * Return: the head request if one is found, otherwise %NULL. */ -struct nfs_page * -pnfs_generic_search_commit_reqs(struct nfs_commit_info *cinfo, struct page *page) +struct nfs_page *pnfs_generic_search_commit_reqs(struct nfs_commit_info *cinfo, + struct folio *folio) { struct pnfs_ds_commit_info *fl_cinfo = cinfo->ds; struct pnfs_commit_array *array; @@ -390,7 +390,7 @@ pnfs_generic_search_commit_reqs(struct nfs_commit_info *cinfo, struct page *page list_for_each_entry(array, &fl_cinfo->commits, cinfo_list) { req = pnfs_bucket_search_commit_reqs(array->buckets, - array->nbuckets, page); + array->nbuckets, folio); if (req) return req; } @@ -1180,7 +1180,7 @@ pnfs_layout_mark_request_commit(struct nfs_page *req, nfs_request_add_commit_list_locked(req, list, cinfo); mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex); - nfs_mark_page_unstable(req->wb_page, cinfo); + nfs_folio_mark_unstable(nfs_page_to_folio(req), cinfo); return; out_resched: mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex); diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 8ae2c8d1219d..c380cff4108e 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -49,12 +49,11 @@ static void nfs_readhdr_free(struct nfs_pgio_header *rhdr) kmem_cache_free(nfs_rdata_cachep, rhdr); } -static -int nfs_return_empty_page(struct page *page) +static int nfs_return_empty_folio(struct folio *folio) { - zero_user(page, 0, PAGE_SIZE); - SetPageUptodate(page); - unlock_page(page); + folio_zero_segment(folio, 0, folio_size(folio)); + folio_mark_uptodate(folio); + folio_unlock(folio); return 0; } @@ -111,18 +110,18 @@ EXPORT_SYMBOL_GPL(nfs_pageio_reset_read_mds); static void nfs_readpage_release(struct nfs_page *req, int error) { struct inode *inode = d_inode(nfs_req_openctx(req)->dentry); - struct page *page = req->wb_page; + struct folio *folio = nfs_page_to_folio(req); dprintk("NFS: read done (%s/%llu %d@%lld)\n", inode->i_sb->s_id, (unsigned long long)NFS_FILEID(inode), req->wb_bytes, (long long)req_offset(req)); if (nfs_error_is_fatal_on_server(error) && error != -ETIMEDOUT) - SetPageError(page); + folio_set_error(folio); if (nfs_page_group_sync_on_bit(req, PG_UNLOCKPAGE)) { - if (PageUptodate(page)) - nfs_fscache_write_page(inode, page); - unlock_page(page); + if (folio_test_uptodate(folio)) + nfs_fscache_write_page(inode, &folio->page); + folio_unlock(folio); } nfs_release_request(req); } @@ -135,7 +134,7 @@ struct nfs_readdesc { static void nfs_page_group_set_uptodate(struct nfs_page *req) { if (nfs_page_group_sync_on_bit(req, PG_UPTODATE)) - SetPageUptodate(req->wb_page); + folio_mark_uptodate(nfs_page_to_folio(req)); } static void nfs_read_completion(struct nfs_pgio_header *hdr) @@ -147,7 +146,7 @@ static void nfs_read_completion(struct nfs_pgio_header *hdr) goto out; while (!list_empty(&hdr->pages)) { struct nfs_page *req = nfs_list_entry(hdr->pages.next); - struct page *page = req->wb_page; + struct folio *folio = nfs_page_to_folio(req); unsigned long start = req->wb_pgbase; unsigned long end = req->wb_pgbase + req->wb_bytes; @@ -157,14 +156,14 @@ static void nfs_read_completion(struct nfs_pgio_header *hdr) if (bytes > hdr->good_bytes) { /* nothing in this request was good, so zero * the full extent of the request */ - zero_user_segment(page, start, end); + folio_zero_segment(folio, start, end); } else if (hdr->good_bytes - bytes < req->wb_bytes) { /* part of this request has good bytes, but * not all. zero the bad bytes */ start += hdr->good_bytes - bytes; WARN_ON(start < req->wb_pgbase); - zero_user_segment(page, start, end); + folio_zero_segment(folio, start, end); } } error = 0; @@ -281,33 +280,34 @@ static void nfs_readpage_result(struct rpc_task *task, nfs_readpage_retry(task, hdr); } -static int -readpage_async_filler(struct nfs_readdesc *desc, struct page *page) +static int readpage_async_filler(struct nfs_readdesc *desc, struct folio *folio) { - struct inode *inode = page_file_mapping(page)->host; - unsigned int rsize = NFS_SERVER(inode)->rsize; + struct inode *inode = folio_file_mapping(folio)->host; + struct nfs_server *server = NFS_SERVER(inode); + size_t fsize = folio_size(folio); + unsigned int rsize = server->rsize; struct nfs_page *new; unsigned int len, aligned_len; int error; - len = nfs_page_length(page); + len = nfs_folio_length(folio); if (len == 0) - return nfs_return_empty_page(page); + return nfs_return_empty_folio(folio); - aligned_len = min_t(unsigned int, ALIGN(len, rsize), PAGE_SIZE); + aligned_len = min_t(unsigned int, ALIGN(len, rsize), fsize); - if (!IS_SYNC(page->mapping->host)) { - error = nfs_fscache_read_page(page->mapping->host, page); + if (!IS_SYNC(inode)) { + error = nfs_fscache_read_page(inode, &folio->page); if (error == 0) goto out_unlock; } - new = nfs_create_request(desc->ctx, page, 0, aligned_len); + new = nfs_page_create_from_folio(desc->ctx, folio, 0, aligned_len); if (IS_ERR(new)) goto out_error; - if (len < PAGE_SIZE) - zero_user_segment(page, len, PAGE_SIZE); + if (len < fsize) + folio_zero_segment(folio, len, fsize); if (!nfs_pageio_add_request(&desc->pgio, new)) { nfs_list_remove_request(new); error = desc->pgio.pg_error; @@ -318,7 +318,7 @@ readpage_async_filler(struct nfs_readdesc *desc, struct page *page) out_error: error = PTR_ERR(new); out_unlock: - unlock_page(page); + folio_unlock(folio); out: return error; } @@ -331,61 +331,54 @@ out: */ int nfs_read_folio(struct file *file, struct folio *folio) { - struct page *page = &folio->page; struct nfs_readdesc desc; - struct inode *inode = page_file_mapping(page)->host; + struct inode *inode = file_inode(file); int ret; - trace_nfs_aop_readpage(inode, page); + trace_nfs_aop_readpage(inode, folio); nfs_inc_stats(inode, NFSIOS_VFSREADPAGE); /* * Try to flush any pending writes to the file.. * - * NOTE! Because we own the page lock, there cannot + * NOTE! Because we own the folio lock, there cannot * be any new pending writes generated at this point - * for this page (other pages can be written to). + * for this folio (other folios can be written to). */ - ret = nfs_wb_page(inode, page); + ret = nfs_wb_folio(inode, folio); if (ret) goto out_unlock; - if (PageUptodate(page)) + if (folio_test_uptodate(folio)) goto out_unlock; ret = -ESTALE; if (NFS_STALE(inode)) goto out_unlock; - if (file == NULL) { - ret = -EBADF; - desc.ctx = nfs_find_open_context(inode, NULL, FMODE_READ); - if (desc.ctx == NULL) - goto out_unlock; - } else - desc.ctx = get_nfs_open_context(nfs_file_open_context(file)); + desc.ctx = get_nfs_open_context(nfs_file_open_context(file)); xchg(&desc.ctx->error, 0); nfs_pageio_init_read(&desc.pgio, inode, false, &nfs_async_read_completion_ops); - ret = readpage_async_filler(&desc, page); + ret = readpage_async_filler(&desc, folio); if (ret) goto out; nfs_pageio_complete_read(&desc.pgio); ret = desc.pgio.pg_error < 0 ? desc.pgio.pg_error : 0; if (!ret) { - ret = wait_on_page_locked_killable(page); - if (!PageUptodate(page) && !ret) + ret = folio_wait_locked_killable(folio); + if (!folio_test_uptodate(folio) && !ret) ret = xchg(&desc.ctx->error, 0); } out: put_nfs_open_context(desc.ctx); - trace_nfs_aop_readpage_done(inode, page, ret); + trace_nfs_aop_readpage_done(inode, folio, ret); return ret; out_unlock: - unlock_page(page); - trace_nfs_aop_readpage_done(inode, page, ret); + folio_unlock(folio); + trace_nfs_aop_readpage_done(inode, folio, ret); return ret; } @@ -395,7 +388,7 @@ void nfs_readahead(struct readahead_control *ractl) struct file *file = ractl->file; struct nfs_readdesc desc; struct inode *inode = ractl->mapping->host; - struct page *page; + struct folio *folio; int ret; trace_nfs_aop_readahead(inode, readahead_pos(ractl), nr_pages); @@ -416,9 +409,8 @@ void nfs_readahead(struct readahead_control *ractl) nfs_pageio_init_read(&desc.pgio, inode, false, &nfs_async_read_completion_ops); - while ((page = readahead_page(ractl)) != NULL) { - ret = readpage_async_filler(&desc, page); - put_page(page); + while ((folio = readahead_folio(ractl)) != NULL) { + ret = readpage_async_filler(&desc, folio); if (ret) break; } diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 80c240e50952..f4cca8f00c0c 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -25,6 +25,7 @@ #include <linux/freezer.h> #include <linux/wait.h> #include <linux/iversion.h> +#include <linux/filelock.h> #include <linux/uaccess.h> #include <linux/sched/mm.h> @@ -63,7 +64,7 @@ static void nfs_init_cinfo_from_inode(struct nfs_commit_info *cinfo, struct inode *inode); static struct nfs_page * nfs_page_search_commits_for_head_request_locked(struct nfs_inode *nfsi, - struct page *page); + struct folio *folio); static struct kmem_cache *nfs_wdata_cachep; static mempool_t *nfs_wdata_mempool; @@ -170,31 +171,28 @@ nfs_cancel_remove_inode(struct nfs_page *req, struct inode *inode) return 0; } -static struct nfs_page * -nfs_page_private_request(struct page *page) +static struct nfs_page *nfs_folio_private_request(struct folio *folio) { - if (!PagePrivate(page)) - return NULL; - return (struct nfs_page *)page_private(page); + return folio_get_private(folio); } -/* - * nfs_page_find_head_request_locked - find head request associated with @page +/** + * nfs_folio_find_private_request - find head request associated with a folio + * @folio: pointer to folio * * must be called while holding the inode lock. * * returns matching head request with reference held, or NULL if not found. */ -static struct nfs_page * -nfs_page_find_private_request(struct page *page) +static struct nfs_page *nfs_folio_find_private_request(struct folio *folio) { - struct address_space *mapping = page_file_mapping(page); + struct address_space *mapping = folio_file_mapping(folio); struct nfs_page *req; - if (!PagePrivate(page)) + if (!folio_test_private(folio)) return NULL; spin_lock(&mapping->private_lock); - req = nfs_page_private_request(page); + req = nfs_folio_private_request(folio); if (req) { WARN_ON_ONCE(req->wb_head != req); kref_get(&req->wb_kref); @@ -203,18 +201,17 @@ nfs_page_find_private_request(struct page *page) return req; } -static struct nfs_page * -nfs_page_find_swap_request(struct page *page) +static struct nfs_page *nfs_folio_find_swap_request(struct folio *folio) { - struct inode *inode = page_file_mapping(page)->host; + struct inode *inode = folio_file_mapping(folio)->host; struct nfs_inode *nfsi = NFS_I(inode); struct nfs_page *req = NULL; - if (!PageSwapCache(page)) + if (!folio_test_swapcache(folio)) return NULL; mutex_lock(&nfsi->commit_mutex); - if (PageSwapCache(page)) { + if (folio_test_swapcache(folio)) { req = nfs_page_search_commits_for_head_request_locked(nfsi, - page); + folio); if (req) { WARN_ON_ONCE(req->wb_head != req); kref_get(&req->wb_kref); @@ -224,29 +221,30 @@ nfs_page_find_swap_request(struct page *page) return req; } -/* - * nfs_page_find_head_request - find head request associated with @page +/** + * nfs_folio_find_head_request - find head request associated with a folio + * @folio: pointer to folio * * returns matching head request with reference held, or NULL if not found. */ -static struct nfs_page *nfs_page_find_head_request(struct page *page) +static struct nfs_page *nfs_folio_find_head_request(struct folio *folio) { struct nfs_page *req; - req = nfs_page_find_private_request(page); + req = nfs_folio_find_private_request(folio); if (!req) - req = nfs_page_find_swap_request(page); + req = nfs_folio_find_swap_request(folio); return req; } -static struct nfs_page *nfs_find_and_lock_page_request(struct page *page) +static struct nfs_page *nfs_folio_find_and_lock_request(struct folio *folio) { - struct inode *inode = page_file_mapping(page)->host; + struct inode *inode = folio_file_mapping(folio)->host; struct nfs_page *req, *head; int ret; for (;;) { - req = nfs_page_find_head_request(page); + req = nfs_folio_find_head_request(folio); if (!req) return req; head = nfs_page_group_lock_head(req); @@ -260,9 +258,9 @@ static struct nfs_page *nfs_find_and_lock_page_request(struct page *page) return ERR_PTR(ret); } /* Ensure that nobody removed the request before we locked it */ - if (head == nfs_page_private_request(page)) + if (head == nfs_folio_private_request(folio)) break; - if (PageSwapCache(page)) + if (folio_test_swapcache(folio)) break; nfs_unlock_and_release_request(head); } @@ -270,18 +268,19 @@ static struct nfs_page *nfs_find_and_lock_page_request(struct page *page) } /* Adjust the file length if we're writing beyond the end */ -static void nfs_grow_file(struct page *page, unsigned int offset, unsigned int count) +static void nfs_grow_file(struct folio *folio, unsigned int offset, + unsigned int count) { - struct inode *inode = page_file_mapping(page)->host; + struct inode *inode = folio_file_mapping(folio)->host; loff_t end, i_size; pgoff_t end_index; spin_lock(&inode->i_lock); i_size = i_size_read(inode); - end_index = (i_size - 1) >> PAGE_SHIFT; - if (i_size > 0 && page_index(page) < end_index) + end_index = ((i_size - 1) >> folio_shift(folio)) << folio_order(folio); + if (i_size > 0 && folio_index(folio) < end_index) goto out; - end = page_file_offset(page) + ((loff_t)offset+count); + end = folio_file_pos(folio) + (loff_t)offset + (loff_t)count; if (i_size >= end) goto out; trace_nfs_size_grow(inode, end); @@ -307,11 +306,11 @@ static void nfs_set_pageerror(struct address_space *mapping) spin_unlock(&inode->i_lock); } -static void nfs_mapping_set_error(struct page *page, int error) +static void nfs_mapping_set_error(struct folio *folio, int error) { - struct address_space *mapping = page_file_mapping(page); + struct address_space *mapping = folio_file_mapping(folio); - SetPageError(page); + folio_set_error(folio); filemap_set_wb_err(mapping, error); if (mapping->host) errseq_set(&mapping->host->i_sb->s_wb_err, @@ -358,9 +357,9 @@ nfs_page_group_search_locked(struct nfs_page *head, unsigned int page_offset) */ static bool nfs_page_group_covers_page(struct nfs_page *req) { + unsigned int len = nfs_folio_length(nfs_page_to_folio(req)); struct nfs_page *tmp; unsigned int pos = 0; - unsigned int len = nfs_page_length(req->wb_page); nfs_page_group_lock(req); @@ -380,11 +379,13 @@ static bool nfs_page_group_covers_page(struct nfs_page *req) */ static void nfs_mark_uptodate(struct nfs_page *req) { - if (PageUptodate(req->wb_page)) + struct folio *folio = nfs_page_to_folio(req); + + if (folio_test_uptodate(folio)) return; if (!nfs_page_group_covers_page(req)) return; - SetPageUptodate(req->wb_page); + folio_mark_uptodate(folio); } static int wb_priority(struct writeback_control *wbc) @@ -406,35 +407,34 @@ int nfs_congestion_kb; #define NFS_CONGESTION_OFF_THRESH \ (NFS_CONGESTION_ON_THRESH - (NFS_CONGESTION_ON_THRESH >> 2)) -static void nfs_set_page_writeback(struct page *page) +static void nfs_folio_set_writeback(struct folio *folio) { - struct inode *inode = page_file_mapping(page)->host; - struct nfs_server *nfss = NFS_SERVER(inode); - int ret = test_set_page_writeback(page); - - WARN_ON_ONCE(ret != 0); + struct nfs_server *nfss = NFS_SERVER(folio_file_mapping(folio)->host); - if (atomic_long_inc_return(&nfss->writeback) > - NFS_CONGESTION_ON_THRESH) + folio_start_writeback(folio); + if (atomic_long_inc_return(&nfss->writeback) > NFS_CONGESTION_ON_THRESH) nfss->write_congested = 1; } -static void nfs_end_page_writeback(struct nfs_page *req) +static void nfs_folio_end_writeback(struct folio *folio) { - struct inode *inode = page_file_mapping(req->wb_page)->host; - struct nfs_server *nfss = NFS_SERVER(inode); - bool is_done; + struct nfs_server *nfss = NFS_SERVER(folio_file_mapping(folio)->host); - is_done = nfs_page_group_sync_on_bit(req, PG_WB_END); - nfs_unlock_request(req); - if (!is_done) - return; - - end_page_writeback(req->wb_page); - if (atomic_long_dec_return(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH) + folio_end_writeback(folio); + if (atomic_long_dec_return(&nfss->writeback) < + NFS_CONGESTION_OFF_THRESH) nfss->write_congested = 0; } +static void nfs_page_end_writeback(struct nfs_page *req) +{ + if (nfs_page_group_sync_on_bit(req, PG_WB_END)) { + nfs_unlock_request(req); + nfs_folio_end_writeback(nfs_page_to_folio(req)); + } else + nfs_unlock_request(req); +} + /* * nfs_destroy_unlinked_subrequests - destroy recently unlinked subrequests * @@ -549,7 +549,7 @@ nfs_join_page_group(struct nfs_page *head, struct inode *inode) /* * nfs_lock_and_join_requests - join all subreqs to the head req - * @page: the page used to lookup the "page group" of nfs_page structures + * @folio: the folio used to lookup the "page group" of nfs_page structures * * This function joins all sub requests to the head request by first * locking all requests in the group, cancelling any pending operations @@ -559,13 +559,12 @@ nfs_join_page_group(struct nfs_page *head, struct inode *inode) * * Returns a locked, referenced pointer to the head request - which after * this call is guaranteed to be the only request associated with the page. - * Returns NULL if no requests are found for @page, or a ERR_PTR if an + * Returns NULL if no requests are found for @folio, or a ERR_PTR if an * error was encountered. */ -static struct nfs_page * -nfs_lock_and_join_requests(struct page *page) +static struct nfs_page *nfs_lock_and_join_requests(struct folio *folio) { - struct inode *inode = page_file_mapping(page)->host; + struct inode *inode = folio_file_mapping(folio)->host; struct nfs_page *head; int ret; @@ -574,7 +573,7 @@ nfs_lock_and_join_requests(struct page *page) * reference to the whole page group - the group will not be destroyed * until the head reference is released. */ - head = nfs_find_and_lock_page_request(page); + head = nfs_folio_find_and_lock_request(folio); if (IS_ERR_OR_NULL(head)) return head; @@ -592,11 +591,10 @@ nfs_lock_and_join_requests(struct page *page) static void nfs_write_error(struct nfs_page *req, int error) { - trace_nfs_write_error(page_file_mapping(req->wb_page)->host, req, - error); - nfs_mapping_set_error(req->wb_page, error); + trace_nfs_write_error(nfs_page_to_inode(req), req, error); + nfs_mapping_set_error(nfs_page_to_folio(req), error); nfs_inode_remove_request(req); - nfs_end_page_writeback(req); + nfs_page_end_writeback(req); nfs_release_request(req); } @@ -604,21 +602,21 @@ static void nfs_write_error(struct nfs_page *req, int error) * Find an associated nfs write request, and prepare to flush it out * May return an error if the user signalled nfs_wait_on_request(). */ -static int nfs_page_async_flush(struct page *page, +static int nfs_page_async_flush(struct folio *folio, struct writeback_control *wbc, struct nfs_pageio_descriptor *pgio) { struct nfs_page *req; int ret = 0; - req = nfs_lock_and_join_requests(page); + req = nfs_lock_and_join_requests(folio); if (!req) goto out; ret = PTR_ERR(req); if (IS_ERR(req)) goto out; - nfs_set_page_writeback(page); + nfs_folio_set_writeback(folio); WARN_ON_ONCE(test_bit(PG_CLEAN, &req->wb_flags)); /* If there is a fatal error that covers this write, just exit */ @@ -636,12 +634,12 @@ static int nfs_page_async_flush(struct page *page, goto out_launder; if (wbc->sync_mode == WB_SYNC_NONE) ret = AOP_WRITEPAGE_ACTIVATE; - redirty_page_for_writepage(wbc, page); + folio_redirty_for_writepage(wbc, folio); nfs_redirty_request(req); pgio->pg_error = 0; } else - nfs_add_stats(page_file_mapping(page)->host, - NFSIOS_WRITEPAGES, 1); + nfs_add_stats(folio_file_mapping(folio)->host, + NFSIOS_WRITEPAGES, 1); out: return ret; out_launder: @@ -649,21 +647,21 @@ out_launder: return 0; } -static int nfs_do_writepage(struct page *page, struct writeback_control *wbc, +static int nfs_do_writepage(struct folio *folio, struct writeback_control *wbc, struct nfs_pageio_descriptor *pgio) { - nfs_pageio_cond_complete(pgio, page_index(page)); - return nfs_page_async_flush(page, wbc, pgio); + nfs_pageio_cond_complete(pgio, folio_index(folio)); + return nfs_page_async_flush(folio, wbc, pgio); } /* * Write an mmapped page to the server. */ -static int nfs_writepage_locked(struct page *page, +static int nfs_writepage_locked(struct folio *folio, struct writeback_control *wbc) { struct nfs_pageio_descriptor pgio; - struct inode *inode = page_file_mapping(page)->host; + struct inode *inode = folio_file_mapping(folio)->host; int err; if (wbc->sync_mode == WB_SYNC_NONE && @@ -671,9 +669,9 @@ static int nfs_writepage_locked(struct page *page, return AOP_WRITEPAGE_ACTIVATE; nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE); - nfs_pageio_init_write(&pgio, inode, 0, - false, &nfs_async_write_completion_ops); - err = nfs_do_writepage(page, wbc, &pgio); + nfs_pageio_init_write(&pgio, inode, 0, false, + &nfs_async_write_completion_ops); + err = nfs_do_writepage(folio, wbc, &pgio); pgio.pg_error = 0; nfs_pageio_complete(&pgio); return err; @@ -681,21 +679,23 @@ static int nfs_writepage_locked(struct page *page, int nfs_writepage(struct page *page, struct writeback_control *wbc) { + struct folio *folio = page_folio(page); int ret; - ret = nfs_writepage_locked(page, wbc); + ret = nfs_writepage_locked(folio, wbc); if (ret != AOP_WRITEPAGE_ACTIVATE) unlock_page(page); return ret; } -static int nfs_writepages_callback(struct page *page, struct writeback_control *wbc, void *data) +static int nfs_writepages_callback(struct folio *folio, + struct writeback_control *wbc, void *data) { int ret; - ret = nfs_do_writepage(page, wbc, data); + ret = nfs_do_writepage(folio, wbc, data); if (ret != AOP_WRITEPAGE_ACTIVATE) - unlock_page(page); + folio_unlock(folio); return ret; } @@ -749,10 +749,11 @@ out_err: /* * Insert a write request into an inode */ -static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req) +static void nfs_inode_add_request(struct nfs_page *req) { - struct address_space *mapping = page_file_mapping(req->wb_page); - struct nfs_inode *nfsi = NFS_I(inode); + struct folio *folio = nfs_page_to_folio(req); + struct address_space *mapping = folio_file_mapping(folio); + struct nfs_inode *nfsi = NFS_I(mapping->host); WARN_ON_ONCE(req->wb_this_page != req); @@ -764,10 +765,10 @@ static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req) * with invalidate/truncate. */ spin_lock(&mapping->private_lock); - if (likely(!PageSwapCache(req->wb_page))) { + if (likely(!folio_test_swapcache(folio))) { set_bit(PG_MAPPED, &req->wb_flags); - SetPagePrivate(req->wb_page); - set_page_private(req->wb_page, (unsigned long)req); + folio_set_private(folio); + folio->private = req; } spin_unlock(&mapping->private_lock); atomic_long_inc(&nfsi->nrequests); @@ -784,47 +785,43 @@ static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req) */ static void nfs_inode_remove_request(struct nfs_page *req) { - struct address_space *mapping = page_file_mapping(req->wb_page); - struct inode *inode = mapping->host; - struct nfs_inode *nfsi = NFS_I(inode); - struct nfs_page *head; - if (nfs_page_group_sync_on_bit(req, PG_REMOVE)) { - head = req->wb_head; + struct folio *folio = nfs_page_to_folio(req->wb_head); + struct address_space *mapping = folio_file_mapping(folio); spin_lock(&mapping->private_lock); - if (likely(head->wb_page && !PageSwapCache(head->wb_page))) { - set_page_private(head->wb_page, 0); - ClearPagePrivate(head->wb_page); - clear_bit(PG_MAPPED, &head->wb_flags); + if (likely(folio && !folio_test_swapcache(folio))) { + folio->private = NULL; + folio_clear_private(folio); + clear_bit(PG_MAPPED, &req->wb_head->wb_flags); } spin_unlock(&mapping->private_lock); } if (test_and_clear_bit(PG_INODE_REF, &req->wb_flags)) { nfs_release_request(req); - atomic_long_dec(&nfsi->nrequests); + atomic_long_dec(&NFS_I(nfs_page_to_inode(req))->nrequests); } } -static void -nfs_mark_request_dirty(struct nfs_page *req) +static void nfs_mark_request_dirty(struct nfs_page *req) { - if (req->wb_page) - __set_page_dirty_nobuffers(req->wb_page); + struct folio *folio = nfs_page_to_folio(req); + if (folio) + filemap_dirty_folio(folio_mapping(folio), folio); } /* * nfs_page_search_commits_for_head_request_locked * - * Search through commit lists on @inode for the head request for @page. + * Search through commit lists on @inode for the head request for @folio. * Must be called while holding the inode (which is cinfo) lock. * * Returns the head request if found, or NULL if not found. */ static struct nfs_page * nfs_page_search_commits_for_head_request_locked(struct nfs_inode *nfsi, - struct page *page) + struct folio *folio) { struct nfs_page *freq, *t; struct nfs_commit_info cinfo; @@ -833,13 +830,13 @@ nfs_page_search_commits_for_head_request_locked(struct nfs_inode *nfsi, nfs_init_cinfo_from_inode(&cinfo, inode); /* search through pnfs commit lists */ - freq = pnfs_search_commit_reqs(inode, &cinfo, page); + freq = pnfs_search_commit_reqs(inode, &cinfo, folio); if (freq) return freq->wb_head; /* Linearly search the commit list for the correct request */ list_for_each_entry_safe(freq, t, &cinfo.mds->list, wb_list) { - if (freq->wb_page == page) + if (nfs_page_to_folio(freq) == folio) return freq->wb_head; } @@ -887,8 +884,7 @@ nfs_request_add_commit_list(struct nfs_page *req, struct nfs_commit_info *cinfo) mutex_lock(&NFS_I(cinfo->inode)->commit_mutex); nfs_request_add_commit_list_locked(req, &cinfo->mds->list, cinfo); mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex); - if (req->wb_page) - nfs_mark_page_unstable(req->wb_page, cinfo); + nfs_folio_mark_unstable(nfs_page_to_folio(req), cinfo); } EXPORT_SYMBOL_GPL(nfs_request_add_commit_list); @@ -947,12 +943,15 @@ nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg, nfs_request_add_commit_list(req, cinfo); } -static void -nfs_clear_page_commit(struct page *page) +static void nfs_folio_clear_commit(struct folio *folio) { - dec_node_page_state(page, NR_WRITEBACK); - dec_wb_stat(&inode_to_bdi(page_file_mapping(page)->host)->wb, - WB_WRITEBACK); + if (folio) { + long nr = folio_nr_pages(folio); + + node_stat_mod_folio(folio, NR_WRITEBACK, -nr); + wb_stat_mod(&inode_to_bdi(folio_file_mapping(folio)->host)->wb, + WB_WRITEBACK, -nr); + } } /* Called holding the request lock on @req */ @@ -970,7 +969,7 @@ nfs_clear_request_commit(struct nfs_page *req) nfs_request_remove_commit_list(req, &cinfo); } mutex_unlock(&NFS_I(inode)->commit_mutex); - nfs_clear_page_commit(req->wb_page); + nfs_folio_clear_commit(nfs_page_to_folio(req)); } } @@ -1002,7 +1001,8 @@ static void nfs_write_completion(struct nfs_pgio_header *hdr) if (test_bit(NFS_IOHDR_ERROR, &hdr->flags) && (hdr->good_bytes < bytes)) { trace_nfs_comp_error(hdr->inode, req, hdr->error); - nfs_mapping_set_error(req->wb_page, hdr->error); + nfs_mapping_set_error(nfs_page_to_folio(req), + hdr->error); goto remove_req; } if (nfs_write_need_commit(hdr)) { @@ -1016,7 +1016,7 @@ static void nfs_write_completion(struct nfs_pgio_header *hdr) remove_req: nfs_inode_remove_request(req); next: - nfs_end_page_writeback(req); + nfs_page_end_writeback(req); nfs_release_request(req); } out: @@ -1092,10 +1092,9 @@ nfs_scan_commit(struct inode *inode, struct list_head *dst, * If the attempt fails, then the existing request is flushed out * to disk. */ -static struct nfs_page *nfs_try_to_update_request(struct inode *inode, - struct page *page, - unsigned int offset, - unsigned int bytes) +static struct nfs_page *nfs_try_to_update_request(struct folio *folio, + unsigned int offset, + unsigned int bytes) { struct nfs_page *req; unsigned int rqend; @@ -1104,7 +1103,7 @@ static struct nfs_page *nfs_try_to_update_request(struct inode *inode, end = offset + bytes; - req = nfs_lock_and_join_requests(page); + req = nfs_lock_and_join_requests(folio); if (IS_ERR_OR_NULL(req)) return req; @@ -1137,7 +1136,7 @@ out_flushme: */ nfs_mark_request_dirty(req); nfs_unlock_and_release_request(req); - error = nfs_wb_page(inode, page); + error = nfs_wb_folio(folio_file_mapping(folio)->host, folio); return (error < 0) ? ERR_PTR(error) : NULL; } @@ -1148,40 +1147,42 @@ out_flushme: * if we have to add a new request. Also assumes that the caller has * already called nfs_flush_incompatible() if necessary. */ -static struct nfs_page * nfs_setup_write_request(struct nfs_open_context* ctx, - struct page *page, unsigned int offset, unsigned int bytes) +static struct nfs_page *nfs_setup_write_request(struct nfs_open_context *ctx, + struct folio *folio, + unsigned int offset, + unsigned int bytes) { - struct inode *inode = page_file_mapping(page)->host; - struct nfs_page *req; + struct nfs_page *req; - req = nfs_try_to_update_request(inode, page, offset, bytes); + req = nfs_try_to_update_request(folio, offset, bytes); if (req != NULL) goto out; - req = nfs_create_request(ctx, page, offset, bytes); + req = nfs_page_create_from_folio(ctx, folio, offset, bytes); if (IS_ERR(req)) goto out; - nfs_inode_add_request(inode, req); + nfs_inode_add_request(req); out: return req; } -static int nfs_writepage_setup(struct nfs_open_context *ctx, struct page *page, - unsigned int offset, unsigned int count) +static int nfs_writepage_setup(struct nfs_open_context *ctx, + struct folio *folio, unsigned int offset, + unsigned int count) { - struct nfs_page *req; + struct nfs_page *req; - req = nfs_setup_write_request(ctx, page, offset, count); + req = nfs_setup_write_request(ctx, folio, offset, count); if (IS_ERR(req)) return PTR_ERR(req); /* Update file length */ - nfs_grow_file(page, offset, count); + nfs_grow_file(folio, offset, count); nfs_mark_uptodate(req); nfs_mark_request_dirty(req); nfs_unlock_and_release_request(req); return 0; } -int nfs_flush_incompatible(struct file *file, struct page *page) +int nfs_flush_incompatible(struct file *file, struct folio *folio) { struct nfs_open_context *ctx = nfs_file_open_context(file); struct nfs_lock_context *l_ctx; @@ -1197,12 +1198,12 @@ int nfs_flush_incompatible(struct file *file, struct page *page) * dropped page. */ do { - req = nfs_page_find_head_request(page); + req = nfs_folio_find_head_request(folio); if (req == NULL) return 0; l_ctx = req->wb_lock_context; - do_flush = req->wb_page != page || - !nfs_match_open_context(nfs_req_openctx(req), ctx); + do_flush = nfs_page_to_folio(req) != folio || + !nfs_match_open_context(nfs_req_openctx(req), ctx); if (l_ctx && flctx && !(list_empty_careful(&flctx->flc_posix) && list_empty_careful(&flctx->flc_flock))) { @@ -1211,7 +1212,7 @@ int nfs_flush_incompatible(struct file *file, struct page *page) nfs_release_request(req); if (!do_flush) return 0; - status = nfs_wb_page(page_file_mapping(page)->host, page); + status = nfs_wb_folio(folio_file_mapping(folio)->host, folio); } while (status == 0); return status; } @@ -1283,9 +1284,9 @@ out: * the PageUptodate() flag. In this case, we will need to turn off * write optimisations that depend on the page contents being correct. */ -static bool nfs_write_pageuptodate(struct page *page, struct inode *inode, - unsigned int pagelen) +static bool nfs_folio_write_uptodate(struct folio *folio, unsigned int pagelen) { + struct inode *inode = folio_file_mapping(folio)->host; struct nfs_inode *nfsi = NFS_I(inode); if (nfs_have_delegated_attributes(inode)) @@ -1299,7 +1300,7 @@ static bool nfs_write_pageuptodate(struct page *page, struct inode *inode, out: if (nfsi->cache_validity & NFS_INO_INVALID_DATA && pagelen != 0) return false; - return PageUptodate(page) != 0; + return folio_test_uptodate(folio) != 0; } static bool @@ -1317,16 +1318,17 @@ is_whole_file_wrlock(struct file_lock *fl) * If the file is opened for synchronous writes then we can just skip the rest * of the checks. */ -static int nfs_can_extend_write(struct file *file, struct page *page, - struct inode *inode, unsigned int pagelen) +static int nfs_can_extend_write(struct file *file, struct folio *folio, + unsigned int pagelen) { - int ret; + struct inode *inode = file_inode(file); struct file_lock_context *flctx = locks_inode_context(inode); struct file_lock *fl; + int ret; if (file->f_flags & O_DSYNC) return 0; - if (!nfs_write_pageuptodate(page, inode, pagelen)) + if (!nfs_folio_write_uptodate(folio, pagelen)) return 0; if (NFS_PROTO(inode)->have_delegation(inode, FMODE_WRITE)) return 1; @@ -1358,33 +1360,33 @@ static int nfs_can_extend_write(struct file *file, struct page *page, * XXX: Keep an eye on generic_file_read to make sure it doesn't do bad * things with a page scheduled for an RPC call (e.g. invalidate it). */ -int nfs_updatepage(struct file *file, struct page *page, - unsigned int offset, unsigned int count) +int nfs_update_folio(struct file *file, struct folio *folio, + unsigned int offset, unsigned int count) { struct nfs_open_context *ctx = nfs_file_open_context(file); - struct address_space *mapping = page_file_mapping(page); - struct inode *inode = mapping->host; - unsigned int pagelen = nfs_page_length(page); + struct address_space *mapping = folio_file_mapping(folio); + struct inode *inode = mapping->host; + unsigned int pagelen = nfs_folio_length(folio); int status = 0; nfs_inc_stats(inode, NFSIOS_VFSUPDATEPAGE); - dprintk("NFS: nfs_updatepage(%pD2 %d@%lld)\n", - file, count, (long long)(page_file_offset(page) + offset)); + dprintk("NFS: nfs_update_folio(%pD2 %d@%lld)\n", file, count, + (long long)(folio_file_pos(folio) + offset)); if (!count) goto out; - if (nfs_can_extend_write(file, page, inode, pagelen)) { + if (nfs_can_extend_write(file, folio, pagelen)) { count = max(count + offset, pagelen); offset = 0; } - status = nfs_writepage_setup(ctx, page, offset, count); + status = nfs_writepage_setup(ctx, folio, offset, count); if (status < 0) nfs_set_pageerror(mapping); out: - dprintk("NFS: nfs_updatepage returns %d (isize %lld)\n", + dprintk("NFS: nfs_update_folio returns %d (isize %lld)\n", status, (long long)i_size_read(inode)); return status; } @@ -1420,13 +1422,13 @@ static void nfs_initiate_write(struct nfs_pgio_header *hdr, */ static void nfs_redirty_request(struct nfs_page *req) { - struct nfs_inode *nfsi = NFS_I(page_file_mapping(req->wb_page)->host); + struct nfs_inode *nfsi = NFS_I(nfs_page_to_inode(req)); /* Bump the transmission count */ req->wb_nio++; nfs_mark_request_dirty(req); atomic_long_inc(&nfsi->redirtied_pages); - nfs_end_page_writeback(req); + nfs_page_end_writeback(req); nfs_release_request(req); } @@ -1784,18 +1786,18 @@ void nfs_retry_commit(struct list_head *page_list, req = nfs_list_entry(page_list->next); nfs_list_remove_request(req); nfs_mark_request_commit(req, lseg, cinfo, ds_commit_idx); - if (!cinfo->dreq) - nfs_clear_page_commit(req->wb_page); + nfs_folio_clear_commit(nfs_page_to_folio(req)); nfs_unlock_and_release_request(req); } } EXPORT_SYMBOL_GPL(nfs_retry_commit); -static void -nfs_commit_resched_write(struct nfs_commit_info *cinfo, - struct nfs_page *req) +static void nfs_commit_resched_write(struct nfs_commit_info *cinfo, + struct nfs_page *req) { - __set_page_dirty_nobuffers(req->wb_page); + struct folio *folio = nfs_page_to_folio(req); + + filemap_dirty_folio(folio_mapping(folio), folio); } /* @@ -1846,12 +1848,13 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data) int status = data->task.tk_status; struct nfs_commit_info cinfo; struct nfs_server *nfss; + struct folio *folio; while (!list_empty(&data->pages)) { req = nfs_list_entry(data->pages.next); nfs_list_remove_request(req); - if (req->wb_page) - nfs_clear_page_commit(req->wb_page); + folio = nfs_page_to_folio(req); + nfs_folio_clear_commit(folio); dprintk("NFS: commit (%s/%llu %d@%lld)", nfs_req_openctx(req)->dentry->d_sb->s_id, @@ -1859,10 +1862,10 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data) req->wb_bytes, (long long)req_offset(req)); if (status < 0) { - if (req->wb_page) { + if (folio) { trace_nfs_commit_error(data->inode, req, status); - nfs_mapping_set_error(req->wb_page, status); + nfs_mapping_set_error(folio, status); nfs_inode_remove_request(req); } dprintk_cont(", error = %d\n", status); @@ -1873,7 +1876,7 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data) * returned by the server against all stored verfs. */ if (nfs_write_match_verf(verf, req)) { /* We have a match */ - if (req->wb_page) + if (folio) nfs_inode_remove_request(req); dprintk_cont(" OK\n"); goto next; @@ -2054,7 +2057,7 @@ int nfs_wb_folio_cancel(struct inode *inode, struct folio *folio) /* blocking call to cancel all requests and join to a single (head) * request */ - req = nfs_lock_and_join_requests(&folio->page); + req = nfs_lock_and_join_requests(folio); if (IS_ERR(req)) { ret = PTR_ERR(req); @@ -2070,13 +2073,18 @@ int nfs_wb_folio_cancel(struct inode *inode, struct folio *folio) return ret; } -/* - * Write back all requests on one page - we do this before reading it. +/** + * nfs_wb_folio - Write back all requests on one page + * @inode: pointer to page + * @folio: pointer to folio + * + * Assumes that the folio has been locked by the caller, and will + * not unlock it. */ -int nfs_wb_page(struct inode *inode, struct page *page) +int nfs_wb_folio(struct inode *inode, struct folio *folio) { - loff_t range_start = page_file_offset(page); - loff_t range_end = range_start + (loff_t)(PAGE_SIZE - 1); + loff_t range_start = folio_file_pos(folio); + loff_t range_end = range_start + (loff_t)folio_size(folio) - 1; struct writeback_control wbc = { .sync_mode = WB_SYNC_ALL, .nr_to_write = 0, @@ -2085,25 +2093,25 @@ int nfs_wb_page(struct inode *inode, struct page *page) }; int ret; - trace_nfs_writeback_page_enter(inode); + trace_nfs_writeback_folio(inode, folio); for (;;) { - wait_on_page_writeback(page); - if (clear_page_dirty_for_io(page)) { - ret = nfs_writepage_locked(page, &wbc); + folio_wait_writeback(folio); + if (folio_clear_dirty_for_io(folio)) { + ret = nfs_writepage_locked(folio, &wbc); if (ret < 0) goto out_error; continue; } ret = 0; - if (!PagePrivate(page)) + if (!folio_test_private(folio)) break; ret = nfs_commit_inode(inode, FLUSH_SYNC); if (ret < 0) goto out_error; } out_error: - trace_nfs_writeback_page_exit(inode, ret); + trace_nfs_writeback_folio_done(inode, folio, ret); return ret; } diff --git a/fs/nfs_common/grace.c b/fs/nfs_common/grace.c index 0a9b72685f98..1479583fbb62 100644 --- a/fs/nfs_common/grace.c +++ b/fs/nfs_common/grace.c @@ -9,6 +9,7 @@ #include <net/net_namespace.h> #include <net/netns/generic.h> #include <linux/fs.h> +#include <linux/filelock.h> static unsigned int grace_net_id; static DEFINE_SPINLOCK(grace_lock); diff --git a/fs/nfsd/fault_inject.c b/fs/nfsd/fault_inject.c deleted file mode 100644 index 76bee0a0d308..000000000000 --- a/fs/nfsd/fault_inject.c +++ /dev/null @@ -1,142 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright (c) 2011 Bryan Schumaker <bjschuma@netapp.com> - * - * Uses debugfs to create fault injection points for client testing - */ - -#include <linux/types.h> -#include <linux/fs.h> -#include <linux/debugfs.h> -#include <linux/module.h> -#include <linux/nsproxy.h> -#include <linux/sunrpc/addr.h> -#include <linux/uaccess.h> -#include <linux/kernel.h> - -#include "state.h" -#include "netns.h" - -struct nfsd_fault_inject_op { - char *file; - u64 (*get)(void); - u64 (*set_val)(u64); - u64 (*set_clnt)(struct sockaddr_storage *, size_t); -}; - -static struct dentry *debug_dir; - -static ssize_t fault_inject_read(struct file *file, char __user *buf, - size_t len, loff_t *ppos) -{ - static u64 val; - char read_buf[25]; - size_t size; - loff_t pos = *ppos; - struct nfsd_fault_inject_op *op = file_inode(file)->i_private; - - if (!pos) - val = op->get(); - size = scnprintf(read_buf, sizeof(read_buf), "%llu\n", val); - - return simple_read_from_buffer(buf, len, ppos, read_buf, size); -} - -static ssize_t fault_inject_write(struct file *file, const char __user *buf, - size_t len, loff_t *ppos) -{ - char write_buf[INET6_ADDRSTRLEN]; - size_t size = min(sizeof(write_buf) - 1, len); - struct net *net = current->nsproxy->net_ns; - struct sockaddr_storage sa; - struct nfsd_fault_inject_op *op = file_inode(file)->i_private; - u64 val; - char *nl; - - if (copy_from_user(write_buf, buf, size)) - return -EFAULT; - write_buf[size] = '\0'; - - /* Deal with any embedded newlines in the string */ - nl = strchr(write_buf, '\n'); - if (nl) { - size = nl - write_buf; - *nl = '\0'; - } - - size = rpc_pton(net, write_buf, size, (struct sockaddr *)&sa, sizeof(sa)); - if (size > 0) { - val = op->set_clnt(&sa, size); - if (val) - pr_info("NFSD [%s]: Client %s had %llu state object(s)\n", - op->file, write_buf, val); - } else { - val = simple_strtoll(write_buf, NULL, 0); - if (val == 0) - pr_info("NFSD Fault Injection: %s (all)", op->file); - else - pr_info("NFSD Fault Injection: %s (n = %llu)", - op->file, val); - val = op->set_val(val); - pr_info("NFSD: %s: found %llu", op->file, val); - } - return len; /* on success, claim we got the whole input */ -} - -static const struct file_operations fops_nfsd = { - .owner = THIS_MODULE, - .read = fault_inject_read, - .write = fault_inject_write, -}; - -void nfsd_fault_inject_cleanup(void) -{ - debugfs_remove_recursive(debug_dir); -} - -static struct nfsd_fault_inject_op inject_ops[] = { - { - .file = "forget_clients", - .get = nfsd_inject_print_clients, - .set_val = nfsd_inject_forget_clients, - .set_clnt = nfsd_inject_forget_client, - }, - { - .file = "forget_locks", - .get = nfsd_inject_print_locks, - .set_val = nfsd_inject_forget_locks, - .set_clnt = nfsd_inject_forget_client_locks, - }, - { - .file = "forget_openowners", - .get = nfsd_inject_print_openowners, - .set_val = nfsd_inject_forget_openowners, - .set_clnt = nfsd_inject_forget_client_openowners, - }, - { - .file = "forget_delegations", - .get = nfsd_inject_print_delegations, - .set_val = nfsd_inject_forget_delegations, - .set_clnt = nfsd_inject_forget_client_delegations, - }, - { - .file = "recall_delegations", - .get = nfsd_inject_print_delegations, - .set_val = nfsd_inject_recall_delegations, - .set_clnt = nfsd_inject_recall_client_delegations, - }, -}; - -void nfsd_fault_inject_init(void) -{ - unsigned int i; - struct nfsd_fault_inject_op *op; - umode_t mode = S_IFREG | S_IRUSR | S_IWUSR; - - debug_dir = debugfs_create_dir("nfsd", NULL); - - for (i = 0; i < ARRAY_SIZE(inject_ops); i++) { - op = &inject_ops[i]; - debugfs_create_file(op->file, mode, debug_dir, op, &fops_nfsd); - } -} diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index 45b2c9e3f636..6e8712bd7c99 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -331,37 +331,27 @@ nfsd_file_alloc(struct nfsd_file_lookup_key *key, unsigned int may) return nf; } +/** + * nfsd_file_check_write_error - check for writeback errors on a file + * @nf: nfsd_file to check for writeback errors + * + * Check whether a nfsd_file has an unseen error. Reset the write + * verifier if so. + */ static void -nfsd_file_fsync(struct nfsd_file *nf) -{ - struct file *file = nf->nf_file; - int ret; - - if (!file || !(file->f_mode & FMODE_WRITE)) - return; - ret = vfs_fsync(file, 1); - trace_nfsd_file_fsync(nf, ret); - if (ret) - nfsd_reset_write_verifier(net_generic(nf->nf_net, nfsd_net_id)); -} - -static int nfsd_file_check_write_error(struct nfsd_file *nf) { struct file *file = nf->nf_file; - if (!file || !(file->f_mode & FMODE_WRITE)) - return 0; - return filemap_check_wb_err(file->f_mapping, READ_ONCE(file->f_wb_err)); + if ((file->f_mode & FMODE_WRITE) && + filemap_check_wb_err(file->f_mapping, READ_ONCE(file->f_wb_err))) + nfsd_reset_write_verifier(net_generic(nf->nf_net, nfsd_net_id)); } static void nfsd_file_hash_remove(struct nfsd_file *nf) { trace_nfsd_file_unhash(nf); - - if (nfsd_file_check_write_error(nf)) - nfsd_reset_write_verifier(net_generic(nf->nf_net, nfsd_net_id)); rhashtable_remove_fast(&nfsd_file_rhash_tbl, &nf->nf_rhash, nfsd_file_rhash_params); } @@ -387,23 +377,12 @@ nfsd_file_free(struct nfsd_file *nf) this_cpu_add(nfsd_file_total_age, age); nfsd_file_unhash(nf); - - /* - * We call fsync here in order to catch writeback errors. It's not - * strictly required by the protocol, but an nfsd_file could get - * evicted from the cache before a COMMIT comes in. If another - * task were to open that file in the interim and scrape the error, - * then the client may never see it. By calling fsync here, we ensure - * that writeback happens before the entry is freed, and that any - * errors reported result in the write verifier changing. - */ - nfsd_file_fsync(nf); - if (nf->nf_mark) nfsd_file_mark_put(nf->nf_mark); if (nf->nf_file) { get_file(nf->nf_file); filp_close(nf->nf_file, NULL); + nfsd_file_check_write_error(nf); fput(nf->nf_file); } @@ -452,7 +431,7 @@ static bool nfsd_file_lru_remove(struct nfsd_file *nf) struct nfsd_file * nfsd_file_get(struct nfsd_file *nf) { - if (likely(refcount_inc_not_zero(&nf->nf_ref))) + if (nf && refcount_inc_not_zero(&nf->nf_ref)) return nf; return NULL; } @@ -662,6 +641,39 @@ static struct shrinker nfsd_file_shrinker = { }; /** + * nfsd_file_cond_queue - conditionally unhash and queue a nfsd_file + * @nf: nfsd_file to attempt to queue + * @dispose: private list to queue successfully-put objects + * + * Unhash an nfsd_file, try to get a reference to it, and then put that + * reference. If it's the last reference, queue it to the dispose list. + */ +static void +nfsd_file_cond_queue(struct nfsd_file *nf, struct list_head *dispose) + __must_hold(RCU) +{ + int decrement = 1; + + /* If we raced with someone else unhashing, ignore it */ + if (!nfsd_file_unhash(nf)) + return; + + /* If we can't get a reference, ignore it */ + if (!nfsd_file_get(nf)) + return; + + /* Extra decrement if we remove from the LRU */ + if (nfsd_file_lru_remove(nf)) + ++decrement; + + /* If refcount goes to 0, then put on the dispose list */ + if (refcount_sub_and_test(decrement, &nf->nf_ref)) { + list_add(&nf->nf_lru, dispose); + trace_nfsd_file_closing(nf); + } +} + +/** * nfsd_file_queue_for_close: try to close out any open nfsd_files for an inode * @inode: inode on which to close out nfsd_files * @dispose: list on which to gather nfsd_files to close out @@ -688,30 +700,11 @@ nfsd_file_queue_for_close(struct inode *inode, struct list_head *dispose) rcu_read_lock(); do { - int decrement = 1; - nf = rhashtable_lookup(&nfsd_file_rhash_tbl, &key, nfsd_file_rhash_params); if (!nf) break; - - /* If we raced with someone else unhashing, ignore it */ - if (!nfsd_file_unhash(nf)) - continue; - - /* If we can't get a reference, ignore it */ - if (!nfsd_file_get(nf)) - continue; - - /* Extra decrement if we remove from the LRU */ - if (nfsd_file_lru_remove(nf)) - ++decrement; - - /* If refcount goes to 0, then put on the dispose list */ - if (refcount_sub_and_test(decrement, &nf->nf_ref)) { - list_add(&nf->nf_lru, dispose); - trace_nfsd_file_closing(nf); - } + nfsd_file_cond_queue(nf, dispose); } while (1); rcu_read_unlock(); } @@ -928,11 +921,8 @@ __nfsd_file_cache_purge(struct net *net) nf = rhashtable_walk_next(&iter); while (!IS_ERR_OR_NULL(nf)) { - if (!net || nf->nf_net == net) { - nfsd_file_unhash(nf); - nfsd_file_lru_remove(nf); - list_add(&nf->nf_lru, &dispose); - } + if (!net || nf->nf_net == net) + nfsd_file_cond_queue(nf, &dispose); nf = rhashtable_walk_next(&iter); } @@ -1071,8 +1061,8 @@ nfsd_file_is_cached(struct inode *inode) static __be32 nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, - unsigned int may_flags, struct nfsd_file **pnf, - bool open, bool want_gc) + unsigned int may_flags, struct file *file, + struct nfsd_file **pnf, bool want_gc) { struct nfsd_file_lookup_key key = { .type = NFSD_FILE_KEY_FULL, @@ -1096,8 +1086,7 @@ retry: rcu_read_lock(); nf = rhashtable_lookup(&nfsd_file_rhash_tbl, &key, nfsd_file_rhash_params); - if (nf) - nf = nfsd_file_get(nf); + nf = nfsd_file_get(nf); rcu_read_unlock(); if (nf) { @@ -1147,8 +1136,8 @@ wait_for_construction: status = nfserrno(nfsd_open_break_lease(file_inode(nf->nf_file), may_flags)); out: if (status == nfs_ok) { - if (open) - this_cpu_inc(nfsd_file_acquisitions); + this_cpu_inc(nfsd_file_acquisitions); + nfsd_file_check_write_error(nf); *pnf = nf; } else { if (refcount_dec_and_test(&nf->nf_ref)) @@ -1158,20 +1147,23 @@ out: out_status: put_cred(key.cred); - if (open) - trace_nfsd_file_acquire(rqstp, key.inode, may_flags, nf, status); + trace_nfsd_file_acquire(rqstp, key.inode, may_flags, nf, status); return status; open_file: trace_nfsd_file_alloc(nf); nf->nf_mark = nfsd_file_mark_find_or_create(nf, key.inode); if (nf->nf_mark) { - if (open) { + if (file) { + get_file(file); + nf->nf_file = file; + status = nfs_ok; + trace_nfsd_file_opened(nf, status); + } else { status = nfsd_open_verified(rqstp, fhp, may_flags, &nf->nf_file); trace_nfsd_file_open(nf, status); - } else - status = nfs_ok; + } } else status = nfserr_jukebox; /* @@ -1207,7 +1199,7 @@ __be32 nfsd_file_acquire_gc(struct svc_rqst *rqstp, struct svc_fh *fhp, unsigned int may_flags, struct nfsd_file **pnf) { - return nfsd_file_do_acquire(rqstp, fhp, may_flags, pnf, true, true); + return nfsd_file_do_acquire(rqstp, fhp, may_flags, NULL, pnf, true); } /** @@ -1228,28 +1220,30 @@ __be32 nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, unsigned int may_flags, struct nfsd_file **pnf) { - return nfsd_file_do_acquire(rqstp, fhp, may_flags, pnf, true, false); + return nfsd_file_do_acquire(rqstp, fhp, may_flags, NULL, pnf, false); } /** - * nfsd_file_create - Get a struct nfsd_file, do not open + * nfsd_file_acquire_opened - Get a struct nfsd_file using existing open file * @rqstp: the RPC transaction being executed * @fhp: the NFS filehandle of the file just created * @may_flags: NFSD_MAY_ settings for the file + * @file: cached, already-open file (may be NULL) * @pnf: OUT: new or found "struct nfsd_file" object * - * The nfsd_file_object returned by this API is reference-counted - * but not garbage-collected. The object is released immediately - * one RCU grace period after the final nfsd_file_put(). + * Acquire a nfsd_file object that is not GC'ed. If one doesn't already exist, + * and @file is non-NULL, use it to instantiate a new nfsd_file instead of + * opening a new one. * * Returns nfs_ok and sets @pnf on success; otherwise an nfsstat in * network byte order is returned. */ __be32 -nfsd_file_create(struct svc_rqst *rqstp, struct svc_fh *fhp, - unsigned int may_flags, struct nfsd_file **pnf) +nfsd_file_acquire_opened(struct svc_rqst *rqstp, struct svc_fh *fhp, + unsigned int may_flags, struct file *file, + struct nfsd_file **pnf) { - return nfsd_file_do_acquire(rqstp, fhp, may_flags, pnf, false, false); + return nfsd_file_do_acquire(rqstp, fhp, may_flags, file, pnf, false); } /* diff --git a/fs/nfsd/filecache.h b/fs/nfsd/filecache.h index b7efb2c3ddb1..41516a4263ea 100644 --- a/fs/nfsd/filecache.h +++ b/fs/nfsd/filecache.h @@ -60,7 +60,8 @@ __be32 nfsd_file_acquire_gc(struct svc_rqst *rqstp, struct svc_fh *fhp, unsigned int may_flags, struct nfsd_file **nfp); __be32 nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, unsigned int may_flags, struct nfsd_file **nfp); -__be32 nfsd_file_create(struct svc_rqst *rqstp, struct svc_fh *fhp, - unsigned int may_flags, struct nfsd_file **nfp); +__be32 nfsd_file_acquire_opened(struct svc_rqst *rqstp, struct svc_fh *fhp, + unsigned int may_flags, struct file *file, + struct nfsd_file **nfp); int nfsd_file_cache_stats_show(struct seq_file *m, void *v); #endif /* _FS_NFSD_FILECACHE_H */ diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h index 8c854ba3285b..ec49b200b797 100644 --- a/fs/nfsd/netns.h +++ b/fs/nfsd/netns.h @@ -10,6 +10,7 @@ #include <net/net_namespace.h> #include <net/netns/generic.h> +#include <linux/filelock.h> #include <linux/percpu_counter.h> #include <linux/siphash.h> @@ -195,7 +196,7 @@ struct nfsd_net { atomic_t nfsd_courtesy_clients; struct shrinker nfsd_client_shrinker; - struct delayed_work nfsd_shrinker_work; + struct work_struct nfsd_shrinker_work; }; /* Simple check to find out if a given net was properly initialized */ diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c index 1457f59f447a..12b2b9bc07bf 100644 --- a/fs/nfsd/nfs2acl.c +++ b/fs/nfsd/nfs2acl.c @@ -113,11 +113,11 @@ static __be32 nfsacld_proc_setacl(struct svc_rqst *rqstp) inode_lock(inode); - error = set_posix_acl(&init_user_ns, fh->fh_dentry, ACL_TYPE_ACCESS, + error = set_posix_acl(&nop_mnt_idmap, fh->fh_dentry, ACL_TYPE_ACCESS, argp->acl_access); if (error) goto out_drop_lock; - error = set_posix_acl(&init_user_ns, fh->fh_dentry, ACL_TYPE_DEFAULT, + error = set_posix_acl(&nop_mnt_idmap, fh->fh_dentry, ACL_TYPE_DEFAULT, argp->acl_default); if (error) goto out_drop_lock; @@ -377,10 +377,11 @@ static const struct svc_procedure nfsd_acl_procedures2[5] = { }, }; -static unsigned int nfsd_acl_count2[ARRAY_SIZE(nfsd_acl_procedures2)]; +static DEFINE_PER_CPU_ALIGNED(unsigned long, + nfsd_acl_count2[ARRAY_SIZE(nfsd_acl_procedures2)]); const struct svc_version nfsd_acl_version2 = { .vs_vers = 2, - .vs_nproc = 5, + .vs_nproc = ARRAY_SIZE(nfsd_acl_procedures2), .vs_proc = nfsd_acl_procedures2, .vs_count = nfsd_acl_count2, .vs_dispatch = nfsd_dispatch, diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c index 647108138e8a..73adca47d373 100644 --- a/fs/nfsd/nfs3acl.c +++ b/fs/nfsd/nfs3acl.c @@ -103,11 +103,11 @@ static __be32 nfsd3_proc_setacl(struct svc_rqst *rqstp) inode_lock(inode); - error = set_posix_acl(&init_user_ns, fh->fh_dentry, ACL_TYPE_ACCESS, + error = set_posix_acl(&nop_mnt_idmap, fh->fh_dentry, ACL_TYPE_ACCESS, argp->acl_access); if (error) goto out_drop_lock; - error = set_posix_acl(&init_user_ns, fh->fh_dentry, ACL_TYPE_DEFAULT, + error = set_posix_acl(&nop_mnt_idmap, fh->fh_dentry, ACL_TYPE_DEFAULT, argp->acl_default); out_drop_lock: @@ -266,10 +266,11 @@ static const struct svc_procedure nfsd_acl_procedures3[3] = { }, }; -static unsigned int nfsd_acl_count3[ARRAY_SIZE(nfsd_acl_procedures3)]; +static DEFINE_PER_CPU_ALIGNED(unsigned long, + nfsd_acl_count3[ARRAY_SIZE(nfsd_acl_procedures3)]); const struct svc_version nfsd_acl_version3 = { .vs_vers = 3, - .vs_nproc = 3, + .vs_nproc = ARRAY_SIZE(nfsd_acl_procedures3), .vs_proc = nfsd_acl_procedures3, .vs_count = nfsd_acl_count3, .vs_dispatch = nfsd_dispatch, diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c index d01b29aba662..e6bb8eeb5bc2 100644 --- a/fs/nfsd/nfs3proc.c +++ b/fs/nfsd/nfs3proc.c @@ -320,7 +320,7 @@ nfsd3_create_file(struct svc_rqst *rqstp, struct svc_fh *fhp, iap->ia_mode &= ~current_umask(); fh_fill_pre_attrs(fhp); - host_err = vfs_create(&init_user_ns, inode, child, iap->ia_mode, true); + host_err = vfs_create(&nop_mnt_idmap, inode, child, iap->ia_mode, true); if (host_err < 0) { status = nfserrno(host_err); goto out; @@ -1064,10 +1064,11 @@ static const struct svc_procedure nfsd_procedures3[22] = { }, }; -static unsigned int nfsd_count3[ARRAY_SIZE(nfsd_procedures3)]; +static DEFINE_PER_CPU_ALIGNED(unsigned long, + nfsd_count3[ARRAY_SIZE(nfsd_procedures3)]); const struct svc_version nfsd_version3 = { .vs_vers = 3, - .vs_nproc = 22, + .vs_nproc = ARRAY_SIZE(nfsd_procedures3), .vs_proc = nfsd_procedures3, .vs_dispatch = nfsd_dispatch, .vs_count = nfsd_count3, diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c index 3564d1c6f610..e8a80052cb1b 100644 --- a/fs/nfsd/nfs4layouts.c +++ b/fs/nfsd/nfs4layouts.c @@ -323,11 +323,11 @@ nfsd4_recall_file_layout(struct nfs4_layout_stateid *ls) if (ls->ls_recalled) goto out_unlock; - ls->ls_recalled = true; - atomic_inc(&ls->ls_stid.sc_file->fi_lo_recalls); if (list_empty(&ls->ls_layouts)) goto out_unlock; + ls->ls_recalled = true; + atomic_inc(&ls->ls_stid.sc_file->fi_lo_recalls); trace_nfsd_layout_recall(&ls->ls_stid.sc_stateid); refcount_inc(&ls->ls_stid.sc_count); diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index bd880d55f565..5ae670807449 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -937,7 +937,7 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, * the client wants us to do more in this compound: */ if (!nfsd4_last_compound_op(rqstp)) - __clear_bit(RQ_SPLICE_OK, &rqstp->rq_flags); + clear_bit(RQ_SPLICE_OK, &rqstp->rq_flags); /* check stateid */ status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, @@ -1214,8 +1214,10 @@ out: return status; out_put_dst: nfsd_file_put(*dst); + *dst = NULL; out_put_src: nfsd_file_put(*src); + *src = NULL; goto out; } @@ -1293,15 +1295,15 @@ extern void nfs_sb_deactive(struct super_block *sb); * setup a work entry in the ssc delayed unmount list. */ static __be32 nfsd4_ssc_setup_dul(struct nfsd_net *nn, char *ipaddr, - struct nfsd4_ssc_umount_item **retwork, struct vfsmount **ss_mnt) + struct nfsd4_ssc_umount_item **nsui) { struct nfsd4_ssc_umount_item *ni = NULL; struct nfsd4_ssc_umount_item *work = NULL; struct nfsd4_ssc_umount_item *tmp; DEFINE_WAIT(wait); + __be32 status = 0; - *ss_mnt = NULL; - *retwork = NULL; + *nsui = NULL; work = kzalloc(sizeof(*work), GFP_KERNEL); try_again: spin_lock(&nn->nfsd_ssc_lock); @@ -1318,18 +1320,19 @@ try_again: /* allow 20secs for mount/unmount for now - revisit */ if (signal_pending(current) || (schedule_timeout(20*HZ) == 0)) { + finish_wait(&nn->nfsd_ssc_waitq, &wait); kfree(work); return nfserr_eagain; } finish_wait(&nn->nfsd_ssc_waitq, &wait); goto try_again; } - *ss_mnt = ni->nsui_vfsmount; + *nsui = ni; refcount_inc(&ni->nsui_refcnt); spin_unlock(&nn->nfsd_ssc_lock); kfree(work); - /* return vfsmount in ss_mnt */ + /* return vfsmount in (*nsui)->nsui_vfsmount */ return 0; } if (work) { @@ -1337,31 +1340,32 @@ try_again: refcount_set(&work->nsui_refcnt, 2); work->nsui_busy = true; list_add_tail(&work->nsui_list, &nn->nfsd_ssc_mount_list); - *retwork = work; - } + *nsui = work; + } else + status = nfserr_resource; spin_unlock(&nn->nfsd_ssc_lock); - return 0; + return status; } -static void nfsd4_ssc_update_dul_work(struct nfsd_net *nn, - struct nfsd4_ssc_umount_item *work, struct vfsmount *ss_mnt) +static void nfsd4_ssc_update_dul(struct nfsd_net *nn, + struct nfsd4_ssc_umount_item *nsui, + struct vfsmount *ss_mnt) { - /* set nsui_vfsmount, clear busy flag and wakeup waiters */ spin_lock(&nn->nfsd_ssc_lock); - work->nsui_vfsmount = ss_mnt; - work->nsui_busy = false; + nsui->nsui_vfsmount = ss_mnt; + nsui->nsui_busy = false; wake_up_all(&nn->nfsd_ssc_waitq); spin_unlock(&nn->nfsd_ssc_lock); } -static void nfsd4_ssc_cancel_dul_work(struct nfsd_net *nn, - struct nfsd4_ssc_umount_item *work) +static void nfsd4_ssc_cancel_dul(struct nfsd_net *nn, + struct nfsd4_ssc_umount_item *nsui) { spin_lock(&nn->nfsd_ssc_lock); - list_del(&work->nsui_list); + list_del(&nsui->nsui_list); wake_up_all(&nn->nfsd_ssc_waitq); spin_unlock(&nn->nfsd_ssc_lock); - kfree(work); + kfree(nsui); } /* @@ -1369,7 +1373,7 @@ static void nfsd4_ssc_cancel_dul_work(struct nfsd_net *nn, */ static __be32 nfsd4_interssc_connect(struct nl4_server *nss, struct svc_rqst *rqstp, - struct vfsmount **mount) + struct nfsd4_ssc_umount_item **nsui) { struct file_system_type *type; struct vfsmount *ss_mnt; @@ -1380,7 +1384,6 @@ nfsd4_interssc_connect(struct nl4_server *nss, struct svc_rqst *rqstp, char *ipaddr, *dev_name, *raw_data; int len, raw_len; __be32 status = nfserr_inval; - struct nfsd4_ssc_umount_item *work = NULL; struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); naddr = &nss->u.nl4_addr; @@ -1388,6 +1391,7 @@ nfsd4_interssc_connect(struct nl4_server *nss, struct svc_rqst *rqstp, naddr->addr_len, (struct sockaddr *)&tmp_addr, sizeof(tmp_addr)); + *nsui = NULL; if (tmp_addrlen == 0) goto out_err; @@ -1430,10 +1434,10 @@ nfsd4_interssc_connect(struct nl4_server *nss, struct svc_rqst *rqstp, goto out_free_rawdata; snprintf(dev_name, len + 5, "%s%s%s:/", startsep, ipaddr, endsep); - status = nfsd4_ssc_setup_dul(nn, ipaddr, &work, &ss_mnt); + status = nfsd4_ssc_setup_dul(nn, ipaddr, nsui); if (status) goto out_free_devname; - if (ss_mnt) + if ((*nsui)->nsui_vfsmount) goto out_done; /* Use an 'internal' mount: SB_KERNMOUNT -> MNT_INTERNAL */ @@ -1441,15 +1445,12 @@ nfsd4_interssc_connect(struct nl4_server *nss, struct svc_rqst *rqstp, module_put(type->owner); if (IS_ERR(ss_mnt)) { status = nfserr_nodev; - if (work) - nfsd4_ssc_cancel_dul_work(nn, work); + nfsd4_ssc_cancel_dul(nn, *nsui); goto out_free_devname; } - if (work) - nfsd4_ssc_update_dul_work(nn, work, ss_mnt); + nfsd4_ssc_update_dul(nn, *nsui, ss_mnt); out_done: status = 0; - *mount = ss_mnt; out_free_devname: kfree(dev_name); @@ -1473,7 +1474,7 @@ out_err: static __be32 nfsd4_setup_inter_ssc(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, - struct nfsd4_copy *copy, struct vfsmount **mount) + struct nfsd4_copy *copy) { struct svc_fh *s_fh = NULL; stateid_t *s_stid = ©->cp_src_stateid; @@ -1486,7 +1487,7 @@ nfsd4_setup_inter_ssc(struct svc_rqst *rqstp, if (status) goto out; - status = nfsd4_interssc_connect(copy->cp_src, rqstp, mount); + status = nfsd4_interssc_connect(copy->cp_src, rqstp, ©->ss_nsui); if (status) goto out; @@ -1504,45 +1505,26 @@ out: } static void -nfsd4_cleanup_inter_ssc(struct vfsmount *ss_mnt, struct file *filp, +nfsd4_cleanup_inter_ssc(struct nfsd4_ssc_umount_item *nsui, struct file *filp, struct nfsd_file *dst) { - bool found = false; - long timeout; - struct nfsd4_ssc_umount_item *tmp; - struct nfsd4_ssc_umount_item *ni = NULL; struct nfsd_net *nn = net_generic(dst->nf_net, nfsd_net_id); + long timeout = msecs_to_jiffies(nfsd4_ssc_umount_timeout); nfs42_ssc_close(filp); - nfsd_file_put(dst); fput(filp); - if (!nn) { - mntput(ss_mnt); - return; - } spin_lock(&nn->nfsd_ssc_lock); - timeout = msecs_to_jiffies(nfsd4_ssc_umount_timeout); - list_for_each_entry_safe(ni, tmp, &nn->nfsd_ssc_mount_list, nsui_list) { - if (ni->nsui_vfsmount->mnt_sb == ss_mnt->mnt_sb) { - list_del(&ni->nsui_list); - /* - * vfsmount can be shared by multiple exports, - * decrement refcnt. If the count drops to 1 it - * will be unmounted when nsui_expire expires. - */ - refcount_dec(&ni->nsui_refcnt); - ni->nsui_expire = jiffies + timeout; - list_add_tail(&ni->nsui_list, &nn->nfsd_ssc_mount_list); - found = true; - break; - } - } + list_del(&nsui->nsui_list); + /* + * vfsmount can be shared by multiple exports, + * decrement refcnt. If the count drops to 1 it + * will be unmounted when nsui_expire expires. + */ + refcount_dec(&nsui->nsui_refcnt); + nsui->nsui_expire = jiffies + timeout; + list_add_tail(&nsui->nsui_list, &nn->nfsd_ssc_mount_list); spin_unlock(&nn->nfsd_ssc_lock); - if (!found) { - mntput(ss_mnt); - return; - } } #else /* CONFIG_NFSD_V4_2_INTER_SSC */ @@ -1550,15 +1532,13 @@ nfsd4_cleanup_inter_ssc(struct vfsmount *ss_mnt, struct file *filp, static __be32 nfsd4_setup_inter_ssc(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, - struct nfsd4_copy *copy, - struct vfsmount **mount) + struct nfsd4_copy *copy) { - *mount = NULL; return nfserr_inval; } static void -nfsd4_cleanup_inter_ssc(struct vfsmount *ss_mnt, struct file *filp, +nfsd4_cleanup_inter_ssc(struct nfsd4_ssc_umount_item *nsui, struct file *filp, struct nfsd_file *dst) { } @@ -1581,13 +1561,6 @@ nfsd4_setup_intra_ssc(struct svc_rqst *rqstp, ©->nf_dst); } -static void -nfsd4_cleanup_intra_ssc(struct nfsd_file *src, struct nfsd_file *dst) -{ - nfsd_file_put(src); - nfsd_file_put(dst); -} - static void nfsd4_cb_offload_release(struct nfsd4_callback *cb) { struct nfsd4_cb_offload *cbo = @@ -1699,18 +1672,27 @@ static void dup_copy_fields(struct nfsd4_copy *src, struct nfsd4_copy *dst) memcpy(dst->cp_src, src->cp_src, sizeof(struct nl4_server)); memcpy(&dst->stateid, &src->stateid, sizeof(src->stateid)); memcpy(&dst->c_fh, &src->c_fh, sizeof(src->c_fh)); - dst->ss_mnt = src->ss_mnt; + dst->ss_nsui = src->ss_nsui; +} + +static void release_copy_files(struct nfsd4_copy *copy) +{ + if (copy->nf_src) + nfsd_file_put(copy->nf_src); + if (copy->nf_dst) + nfsd_file_put(copy->nf_dst); } static void cleanup_async_copy(struct nfsd4_copy *copy) { nfs4_free_copy_state(copy); - nfsd_file_put(copy->nf_dst); - if (!nfsd4_ssc_is_inter(copy)) - nfsd_file_put(copy->nf_src); - spin_lock(©->cp_clp->async_lock); - list_del(©->copies); - spin_unlock(©->cp_clp->async_lock); + release_copy_files(copy); + if (copy->cp_clp) { + spin_lock(©->cp_clp->async_lock); + if (!list_empty(©->copies)) + list_del_init(©->copies); + spin_unlock(©->cp_clp->async_lock); + } nfs4_put_copy(copy); } @@ -1748,8 +1730,8 @@ static int nfsd4_do_async_copy(void *data) if (nfsd4_ssc_is_inter(copy)) { struct file *filp; - filp = nfs42_ssc_open(copy->ss_mnt, ©->c_fh, - ©->stateid); + filp = nfs42_ssc_open(copy->ss_nsui->nsui_vfsmount, + ©->c_fh, ©->stateid); if (IS_ERR(filp)) { switch (PTR_ERR(filp)) { case -EBADF: @@ -1763,11 +1745,10 @@ static int nfsd4_do_async_copy(void *data) } nfserr = nfsd4_do_copy(copy, filp, copy->nf_dst->nf_file, false); - nfsd4_cleanup_inter_ssc(copy->ss_mnt, filp, copy->nf_dst); + nfsd4_cleanup_inter_ssc(copy->ss_nsui, filp, copy->nf_dst); } else { nfserr = nfsd4_do_copy(copy, copy->nf_src->nf_file, copy->nf_dst->nf_file, false); - nfsd4_cleanup_intra_ssc(copy->nf_src, copy->nf_dst); } do_callback: @@ -1789,8 +1770,7 @@ nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, status = nfserr_notsupp; goto out; } - status = nfsd4_setup_inter_ssc(rqstp, cstate, copy, - ©->ss_mnt); + status = nfsd4_setup_inter_ssc(rqstp, cstate, copy); if (status) return nfserr_offload_denied; } else { @@ -1809,12 +1789,13 @@ nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, async_copy = kzalloc(sizeof(struct nfsd4_copy), GFP_KERNEL); if (!async_copy) goto out_err; + INIT_LIST_HEAD(&async_copy->copies); + refcount_set(&async_copy->refcount, 1); async_copy->cp_src = kmalloc(sizeof(*async_copy->cp_src), GFP_KERNEL); if (!async_copy->cp_src) goto out_err; if (!nfs4_init_copy_state(nn, copy)) goto out_err; - refcount_set(&async_copy->refcount, 1); memcpy(©->cp_res.cb_stateid, ©->cp_stateid.cs_stid, sizeof(copy->cp_res.cb_stateid)); dup_copy_fields(copy, async_copy); @@ -1831,38 +1812,53 @@ nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, } else { status = nfsd4_do_copy(copy, copy->nf_src->nf_file, copy->nf_dst->nf_file, true); - nfsd4_cleanup_intra_ssc(copy->nf_src, copy->nf_dst); } out: + release_copy_files(copy); return status; out_err: + if (nfsd4_ssc_is_inter(copy)) { + /* + * Source's vfsmount of inter-copy will be unmounted + * by the laundromat. Use copy instead of async_copy + * since async_copy->ss_nsui might not be set yet. + */ + refcount_dec(©->ss_nsui->nsui_refcnt); + } if (async_copy) cleanup_async_copy(async_copy); status = nfserrno(-ENOMEM); - /* - * source's vfsmount of inter-copy will be unmounted - * by the laundromat - */ goto out; } -struct nfsd4_copy * -find_async_copy(struct nfs4_client *clp, stateid_t *stateid) +static struct nfsd4_copy * +find_async_copy_locked(struct nfs4_client *clp, stateid_t *stateid) { struct nfsd4_copy *copy; - spin_lock(&clp->async_lock); + lockdep_assert_held(&clp->async_lock); + list_for_each_entry(copy, &clp->async_copies, copies) { if (memcmp(©->cp_stateid.cs_stid, stateid, NFS4_STATEID_SIZE)) continue; - refcount_inc(©->refcount); - spin_unlock(&clp->async_lock); return copy; } - spin_unlock(&clp->async_lock); return NULL; } +static struct nfsd4_copy * +find_async_copy(struct nfs4_client *clp, stateid_t *stateid) +{ + struct nfsd4_copy *copy; + + spin_lock(&clp->async_lock); + copy = find_async_copy_locked(clp, stateid); + if (copy) + refcount_inc(©->refcount); + spin_unlock(&clp->async_lock); + return copy; +} + static __be32 nfsd4_offload_cancel(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, @@ -1947,22 +1943,24 @@ nfsd4_fallocate(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, nfsd_file_put(nf); return status; } + static __be32 nfsd4_offload_status(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) { struct nfsd4_offload_status *os = &u->offload_status; - __be32 status = 0; + __be32 status = nfs_ok; struct nfsd4_copy *copy; struct nfs4_client *clp = cstate->clp; - copy = find_async_copy(clp, &os->stateid); - if (copy) { + spin_lock(&clp->async_lock); + copy = find_async_copy_locked(clp, &os->stateid); + if (copy) os->count = copy->cp_res.wr_bytes_written; - nfs4_put_copy(copy); - } else + else status = nfserr_bad_stateid; + spin_unlock(&clp->async_lock); return status; } @@ -2607,12 +2605,11 @@ nfsd4_proc_compound(struct svc_rqst *rqstp) cstate->minorversion = args->minorversion; fh_init(current_fh, NFS4_FHSIZE); fh_init(save_fh, NFS4_FHSIZE); - /* * Don't use the deferral mechanism for NFSv4; compounds make it * too hard to avoid non-idempotency problems. */ - __clear_bit(RQ_USEDEFERRAL, &rqstp->rq_flags); + clear_bit(RQ_USEDEFERRAL, &rqstp->rq_flags); /* * According to RFC3010, this takes precedence over all other errors. @@ -2734,7 +2731,7 @@ encode_op: out: cstate->status = status; /* Reset deferral mechanism for RPC deferrals */ - __set_bit(RQ_USEDEFERRAL, &rqstp->rq_flags); + set_bit(RQ_USEDEFERRAL, &rqstp->rq_flags); return rpc_success; } @@ -3619,12 +3616,13 @@ static const struct svc_procedure nfsd_procedures4[2] = { }, }; -static unsigned int nfsd_count3[ARRAY_SIZE(nfsd_procedures4)]; +static DEFINE_PER_CPU_ALIGNED(unsigned long, + nfsd_count4[ARRAY_SIZE(nfsd_procedures4)]); const struct svc_version nfsd_version4 = { .vs_vers = 4, - .vs_nproc = 2, + .vs_nproc = ARRAY_SIZE(nfsd_procedures4), .vs_proc = nfsd_procedures4, - .vs_count = nfsd_count3, + .vs_count = nfsd_count4, .vs_dispatch = nfsd_dispatch, .vs_xdrsize = NFS4_SVC_XDRSIZE, .vs_rpcb_optnl = true, diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index 78b8cd9651d5..3509e73abe1f 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c @@ -233,7 +233,7 @@ nfsd4_create_clid_dir(struct nfs4_client *clp) * as well be forgiving and just succeed silently. */ goto out_put; - status = vfs_mkdir(&init_user_ns, d_inode(dir), dentry, S_IRWXU); + status = vfs_mkdir(&nop_mnt_idmap, d_inode(dir), dentry, S_IRWXU); out_put: dput(dentry); out_unlock: @@ -353,7 +353,7 @@ nfsd4_unlink_clid_dir(char *name, int namlen, struct nfsd_net *nn) status = -ENOENT; if (d_really_is_negative(dentry)) goto out; - status = vfs_rmdir(&init_user_ns, d_inode(dir), dentry); + status = vfs_rmdir(&nop_mnt_idmap, d_inode(dir), dentry); out: dput(dentry); out_unlock: @@ -443,7 +443,7 @@ purge_old(struct dentry *parent, struct dentry *child, struct nfsd_net *nn) if (nfs4_has_reclaimed_state(name, nn)) goto out_free; - status = vfs_rmdir(&init_user_ns, d_inode(parent), child); + status = vfs_rmdir(&nop_mnt_idmap, d_inode(parent), child); if (status) printk("failed to remove client recovery directory %pd\n", child); diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 7b2ee535ade8..6e61fa3acaf1 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -600,23 +600,15 @@ put_nfs4_file(struct nfs4_file *fi) } static struct nfsd_file * -__nfs4_get_fd(struct nfs4_file *f, int oflag) -{ - if (f->fi_fds[oflag]) - return nfsd_file_get(f->fi_fds[oflag]); - return NULL; -} - -static struct nfsd_file * find_writeable_file_locked(struct nfs4_file *f) { struct nfsd_file *ret; lockdep_assert_held(&f->fi_lock); - ret = __nfs4_get_fd(f, O_WRONLY); + ret = nfsd_file_get(f->fi_fds[O_WRONLY]); if (!ret) - ret = __nfs4_get_fd(f, O_RDWR); + ret = nfsd_file_get(f->fi_fds[O_RDWR]); return ret; } @@ -639,9 +631,9 @@ find_readable_file_locked(struct nfs4_file *f) lockdep_assert_held(&f->fi_lock); - ret = __nfs4_get_fd(f, O_RDONLY); + ret = nfsd_file_get(f->fi_fds[O_RDONLY]); if (!ret) - ret = __nfs4_get_fd(f, O_RDWR); + ret = nfsd_file_get(f->fi_fds[O_RDWR]); return ret; } @@ -665,11 +657,11 @@ find_any_file(struct nfs4_file *f) if (!f) return NULL; spin_lock(&f->fi_lock); - ret = __nfs4_get_fd(f, O_RDWR); + ret = nfsd_file_get(f->fi_fds[O_RDWR]); if (!ret) { - ret = __nfs4_get_fd(f, O_WRONLY); + ret = nfsd_file_get(f->fi_fds[O_WRONLY]); if (!ret) - ret = __nfs4_get_fd(f, O_RDONLY); + ret = nfsd_file_get(f->fi_fds[O_RDONLY]); } spin_unlock(&f->fi_lock); return ret; @@ -688,15 +680,6 @@ static struct nfsd_file *find_any_file_locked(struct nfs4_file *f) return NULL; } -static struct nfsd_file *find_deleg_file_locked(struct nfs4_file *f) -{ - lockdep_assert_held(&f->fi_lock); - - if (f->fi_deleg_file) - return f->fi_deleg_file; - return NULL; -} - static atomic_long_t num_delegations; unsigned long max_delegations; @@ -992,7 +975,6 @@ static int nfs4_init_cp_state(struct nfsd_net *nn, copy_stateid_t *stid, stid->cs_stid.si_opaque.so_clid.cl_boot = (u32)nn->boot_time; stid->cs_stid.si_opaque.so_clid.cl_id = nn->s2s_cp_cl_id; - stid->cs_type = cs_type; idr_preload(GFP_KERNEL); spin_lock(&nn->s2s_cp_lock); @@ -1003,6 +985,7 @@ static int nfs4_init_cp_state(struct nfsd_net *nn, copy_stateid_t *stid, idr_preload_end(); if (new_id < 0) return 0; + stid->cs_type = cs_type; return 1; } @@ -1036,7 +1019,8 @@ void nfs4_free_copy_state(struct nfsd4_copy *copy) { struct nfsd_net *nn; - WARN_ON_ONCE(copy->cp_stateid.cs_type != NFS4_COPY_STID); + if (copy->cp_stateid.cs_type != NFS4_COPY_STID) + return; nn = net_generic(copy->cp_clp->net, nfsd_net_id); spin_lock(&nn->s2s_cp_lock); idr_remove(&nn->s2s_cp_stateids, @@ -2705,7 +2689,7 @@ static int nfs4_show_deleg(struct seq_file *s, struct nfs4_stid *st) ds = delegstateid(st); nf = st->sc_file; spin_lock(&nf->fi_lock); - file = find_deleg_file_locked(nf); + file = nf->fi_deleg_file; if (!file) goto out; @@ -4411,7 +4395,7 @@ nfsd4_state_shrinker_count(struct shrinker *shrink, struct shrink_control *sc) if (!count) count = atomic_long_read(&num_delegations); if (count) - mod_delayed_work(laundry_wq, &nn->nfsd_shrinker_work, 0); + queue_work(laundry_wq, &nn->nfsd_shrinker_work); return (unsigned long)count; } @@ -4421,7 +4405,7 @@ nfsd4_state_shrinker_scan(struct shrinker *shrink, struct shrink_control *sc) return SHRINK_STOP; } -int +void nfsd4_init_leases_net(struct nfsd_net *nn) { struct sysinfo si; @@ -4443,16 +4427,6 @@ nfsd4_init_leases_net(struct nfsd_net *nn) nn->nfs4_max_clients = max_t(int, max_clients, NFS4_CLIENTS_PER_GB); atomic_set(&nn->nfsd_courtesy_clients, 0); - nn->nfsd_client_shrinker.scan_objects = nfsd4_state_shrinker_scan; - nn->nfsd_client_shrinker.count_objects = nfsd4_state_shrinker_count; - nn->nfsd_client_shrinker.seeks = DEFAULT_SEEKS; - return register_shrinker(&nn->nfsd_client_shrinker, "nfsd-client"); -} - -void -nfsd4_leases_net_shutdown(struct nfsd_net *nn) -{ - unregister_shrinker(&nn->nfsd_client_shrinker); } static void init_nfs4_replay(struct nfs4_replay *rp) @@ -5262,18 +5236,10 @@ static __be32 nfs4_get_vfs_file(struct svc_rqst *rqstp, struct nfs4_file *fp, if (!fp->fi_fds[oflag]) { spin_unlock(&fp->fi_lock); - if (!open->op_filp) { - status = nfsd_file_acquire(rqstp, cur_fh, access, &nf); - if (status != nfs_ok) - goto out_put_access; - } else { - status = nfsd_file_create(rqstp, cur_fh, access, &nf); - if (status != nfs_ok) - goto out_put_access; - nf->nf_file = open->op_filp; - open->op_filp = NULL; - trace_nfsd_file_create(rqstp, access, nf); - } + status = nfsd_file_acquire_opened(rqstp, cur_fh, access, + open->op_filp, &nf); + if (status != nfs_ok) + goto out_put_access; spin_lock(&fp->fi_lock); if (!fp->fi_fds[oflag]) { @@ -5316,16 +5282,17 @@ nfs4_upgrade_open(struct svc_rqst *rqstp, struct nfs4_file *fp, /* test and set deny mode */ spin_lock(&fp->fi_lock); status = nfs4_file_check_deny(fp, open->op_share_deny); - if (status == nfs_ok) { - if (status != nfserr_share_denied) { - set_deny(open->op_share_deny, stp); - fp->fi_share_deny |= - (open->op_share_deny & NFS4_SHARE_DENY_BOTH); - } else { - if (nfs4_resolve_deny_conflicts_locked(fp, false, - stp, open->op_share_deny, false)) - status = nfserr_jukebox; - } + switch (status) { + case nfs_ok: + set_deny(open->op_share_deny, stp); + fp->fi_share_deny |= + (open->op_share_deny & NFS4_SHARE_DENY_BOTH); + break; + case nfserr_share_denied: + if (nfs4_resolve_deny_conflicts_locked(fp, false, + stp, open->op_share_deny, false)) + status = nfserr_jukebox; + break; } spin_unlock(&fp->fi_lock); @@ -5374,7 +5341,7 @@ static int nfsd4_check_conflicting_opens(struct nfs4_client *clp, { struct nfs4_ol_stateid *st; struct file *f = fp->fi_deleg_file->nf_file; - struct inode *ino = locks_inode(f); + struct inode *ino = file_inode(f); int writes; writes = atomic_read(&ino->i_writecount); @@ -5456,6 +5423,23 @@ nfsd4_verify_deleg_dentry(struct nfsd4_open *open, struct nfs4_file *fp, return 0; } +/* + * We avoid breaking delegations held by a client due to its own activity, but + * clearing setuid/setgid bits on a write is an implicit activity and the client + * may not notice and continue using the old mode. Avoid giving out a delegation + * on setuid/setgid files when the client is requesting an open for write. + */ +static int +nfsd4_verify_setuid_write(struct nfsd4_open *open, struct nfsd_file *nf) +{ + struct inode *inode = file_inode(nf->nf_file); + + if ((open->op_share_access & NFS4_SHARE_ACCESS_WRITE) && + (inode->i_mode & (S_ISUID|S_ISGID))) + return -EAGAIN; + return 0; +} + static struct nfs4_delegation * nfs4_set_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp, struct svc_fh *parent) @@ -5489,6 +5473,8 @@ nfs4_set_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp, spin_lock(&fp->fi_lock); if (nfs4_delegation_exists(clp, fp)) status = -EAGAIN; + else if (nfsd4_verify_setuid_write(open, nf)) + status = -EAGAIN; else if (!fp->fi_deleg_file) { fp->fi_deleg_file = nf; /* increment early to prevent fi_deleg_file from being @@ -5529,6 +5515,14 @@ nfs4_set_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp, if (status) goto out_unlock; + /* + * Now that the deleg is set, check again to ensure that nothing + * raced in and changed the mode while we weren't lookng. + */ + status = nfsd4_verify_setuid_write(open, fp->fi_deleg_file); + if (status) + goto out_unlock; + spin_lock(&state_lock); spin_lock(&fp->fi_lock); if (fp->fi_had_conflict) @@ -6243,8 +6237,7 @@ deleg_reaper(struct nfsd_net *nn) static void nfsd4_state_shrinker_worker(struct work_struct *work) { - struct delayed_work *dwork = to_delayed_work(work); - struct nfsd_net *nn = container_of(dwork, struct nfsd_net, + struct nfsd_net *nn = container_of(work, struct nfsd_net, nfsd_shrinker_work); courtesy_client_reaper(nn); @@ -6425,23 +6418,26 @@ nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate, static struct nfsd_file * nfs4_find_file(struct nfs4_stid *s, int flags) { + struct nfsd_file *ret = NULL; + if (!s) return NULL; switch (s->sc_type) { case NFS4_DELEG_STID: - if (WARN_ON_ONCE(!s->sc_file->fi_deleg_file)) - return NULL; - return nfsd_file_get(s->sc_file->fi_deleg_file); + spin_lock(&s->sc_file->fi_lock); + ret = nfsd_file_get(s->sc_file->fi_deleg_file); + spin_unlock(&s->sc_file->fi_lock); + break; case NFS4_OPEN_STID: case NFS4_LOCK_STID: if (flags & RD_STATE) - return find_readable_file(s->sc_file); + ret = find_readable_file(s->sc_file); else - return find_writeable_file(s->sc_file); + ret = find_writeable_file(s->sc_file); } - return NULL; + return ret; } static __be32 @@ -6566,8 +6562,19 @@ void nfs4_put_cpntf_state(struct nfsd_net *nn, struct nfs4_cpntf_state *cps) spin_unlock(&nn->s2s_cp_lock); } -/* - * Checks for stateid operations +/** + * nfs4_preprocess_stateid_op - find and prep stateid for an operation + * @rqstp: incoming request from client + * @cstate: current compound state + * @fhp: filehandle associated with requested stateid + * @stateid: stateid (provided by client) + * @flags: flags describing type of operation to be done + * @nfp: optional nfsd_file return pointer (may be NULL) + * @cstid: optional returned nfs4_stid pointer (may be NULL) + * + * Given info from the client, look up a nfs4_stid for the operation. On + * success, it returns a reference to the nfs4_stid and/or the nfsd_file + * associated with it. */ __be32 nfs4_preprocess_stateid_op(struct svc_rqst *rqstp, @@ -6756,8 +6763,18 @@ static __be32 nfs4_seqid_op_checks(struct nfsd4_compound_state *cstate, stateid_ return status; } -/* - * Checks for sequence id mutating operations. +/** + * nfs4_preprocess_seqid_op - find and prep an ol_stateid for a seqid-morphing op + * @cstate: compund state + * @seqid: seqid (provided by client) + * @stateid: stateid (provided by client) + * @typemask: mask of allowable types for this operation + * @stpp: return pointer for the stateid found + * @nn: net namespace for request + * + * Given a stateid+seqid from a client, look up an nfs4_ol_stateid and + * return it in @stpp. On a nfs_ok return, the returned stateid will + * have its st_mutex locked. */ static __be32 nfs4_preprocess_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid, @@ -7828,7 +7845,7 @@ check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner) return status; } - inode = locks_inode(nf->nf_file); + inode = file_inode(nf->nf_file); flctx = locks_inode_context(inode); if (flctx && !list_empty_careful(&flctx->flc_posix)) { @@ -8074,11 +8091,20 @@ static int nfs4_state_create_net(struct net *net) INIT_LIST_HEAD(&nn->blocked_locks_lru); INIT_DELAYED_WORK(&nn->laundromat_work, laundromat_main); - INIT_DELAYED_WORK(&nn->nfsd_shrinker_work, nfsd4_state_shrinker_worker); + INIT_WORK(&nn->nfsd_shrinker_work, nfsd4_state_shrinker_worker); get_net(net); + nn->nfsd_client_shrinker.scan_objects = nfsd4_state_shrinker_scan; + nn->nfsd_client_shrinker.count_objects = nfsd4_state_shrinker_count; + nn->nfsd_client_shrinker.seeks = DEFAULT_SEEKS; + + if (register_shrinker(&nn->nfsd_client_shrinker, "nfsd-client")) + goto err_shrinker; return 0; +err_shrinker: + put_net(net); + kfree(nn->sessionid_hashtbl); err_sessionid: kfree(nn->unconf_id_hashtbl); err_unconf_id: @@ -8171,6 +8197,8 @@ nfs4_state_shutdown_net(struct net *net) struct list_head *pos, *next, reaplist; struct nfsd_net *nn = net_generic(net, nfsd_net_id); + unregister_shrinker(&nn->nfsd_client_shrinker); + cancel_work(&nn->nfsd_shrinker_work); cancel_delayed_work_sync(&nn->laundromat_work); locks_end_grace(&nn->nfsd4_manager); @@ -8190,7 +8218,6 @@ nfs4_state_shutdown_net(struct net *net) nfsd4_client_tracking_exit(net); nfs4_state_destroy_net(net); - rhltable_destroy(&nfs4_file_rhltable); #ifdef CONFIG_NFSD_V4_2_INTER_SSC nfsd4_ssc_shutdown_umount(nn); #endif @@ -8200,6 +8227,7 @@ void nfs4_state_shutdown(void) { nfsd4_destroy_callback_queue(); + rhltable_destroy(&nfs4_file_rhltable); } static void diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 2b4ae858c89b..e12e5a4ad502 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -2523,7 +2523,7 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) argp->rqstp->rq_cachetype = cachethis ? RC_REPLBUFF : RC_NOCACHE; if (readcount > 1 || max_reply > PAGE_SIZE - auth_slack) - __clear_bit(RQ_SPLICE_OK, &argp->rqstp->rq_flags); + clear_bit(RQ_SPLICE_OK, &argp->rqstp->rq_flags); return true; } @@ -2965,7 +2965,9 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, goto out; } - err = vfs_getattr(&path, &stat, STATX_BASIC_STATS, AT_STATX_SYNC_AS_STAT); + err = vfs_getattr(&path, &stat, + STATX_BASIC_STATS | STATX_BTIME | STATX_CHANGE_COOKIE, + AT_STATX_SYNC_AS_STAT); if (err) goto out_nfserr; if (!(stat.result_mask & STATX_BTIME)) @@ -3629,6 +3631,17 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen, case nfserr_noent: xdr_truncate_encode(xdr, start_offset); goto skip_entry; + case nfserr_jukebox: + /* + * The pseudoroot should only display dentries that lead to + * exports. If we get EJUKEBOX here, then we can't tell whether + * this entry should be included. Just fail the whole READDIR + * with NFS4ERR_DELAY in that case, and hope that the situation + * will resolve itself by the client's next attempt. + */ + if (cd->rd_fhp->fh_export->ex_flags & NFSEXP_V4ROOT) + goto fail; + fallthrough; default: /* * If the client requested the RDATTR_ERROR attribute, diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c index 3e64a3d50a1c..041faa13b852 100644 --- a/fs/nfsd/nfscache.c +++ b/fs/nfsd/nfscache.c @@ -488,7 +488,7 @@ found_entry: case RC_NOCACHE: break; case RC_REPLSTAT: - svc_putu32(&rqstp->rq_res.head[0], rp->c_replstat); + xdr_stream_encode_be32(&rqstp->rq_res_stream, rp->c_replstat); rtn = RC_REPLY; break; case RC_REPLBUFF: @@ -509,7 +509,7 @@ out_trace: * nfsd_cache_update - Update an entry in the duplicate reply cache. * @rqstp: svc_rqst with a finished Reply * @cachetype: which cache to update - * @statp: Reply's status code + * @statp: pointer to Reply's NFS status code, or NULL * * This is called from nfsd_dispatch when the procedure has been * executed and the complete reply is in rqstp->rq_res. diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index d1e581a60480..7b8f17ee5224 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -14,7 +14,6 @@ #include <linux/lockd/lockd.h> #include <linux/sunrpc/addr.h> #include <linux/sunrpc/gss_api.h> -#include <linux/sunrpc/gss_krb5_enctypes.h> #include <linux/sunrpc/rpc_pipe_fs.h> #include <linux/module.h> #include <linux/fsnotify.h> @@ -47,7 +46,6 @@ enum { NFSD_MaxBlkSize, NFSD_MaxConnections, NFSD_Filecache, - NFSD_SupportedEnctypes, /* * The below MUST come last. Otherwise we leave a hole in nfsd_files[] * with !CONFIG_NFSD_V4 and simple_fill_super() goes oops @@ -187,16 +185,6 @@ static int export_features_show(struct seq_file *m, void *v) DEFINE_SHOW_ATTRIBUTE(export_features); -#if defined(CONFIG_SUNRPC_GSS) || defined(CONFIG_SUNRPC_GSS_MODULE) -static int supported_enctypes_show(struct seq_file *m, void *v) -{ - seq_printf(m, KRB5_SUPPORTED_ENCTYPES); - return 0; -} - -DEFINE_SHOW_ATTRIBUTE(supported_enctypes); -#endif /* CONFIG_SUNRPC_GSS or CONFIG_SUNRPC_GSS_MODULE */ - static const struct file_operations pool_stats_operations = { .open = nfsd_pool_stats_open, .read = seq_read, @@ -1150,6 +1138,9 @@ static struct inode *nfsd_get_inode(struct super_block *sb, umode_t mode) inode->i_op = &simple_dir_inode_operations; inc_nlink(inode); break; + case S_IFLNK: + inode->i_op = &simple_symlink_inode_operations; + break; default: break; } @@ -1195,6 +1186,54 @@ out_err: goto out; } +#if IS_ENABLED(CONFIG_SUNRPC_GSS) +static int __nfsd_symlink(struct inode *dir, struct dentry *dentry, + umode_t mode, const char *content) +{ + struct inode *inode; + + inode = nfsd_get_inode(dir->i_sb, mode); + if (!inode) + return -ENOMEM; + + inode->i_link = (char *)content; + inode->i_size = strlen(content); + + d_add(dentry, inode); + inc_nlink(dir); + fsnotify_create(dir, dentry); + return 0; +} + +/* + * @content is assumed to be a NUL-terminated string that lives + * longer than the symlink itself. + */ +static void nfsd_symlink(struct dentry *parent, const char *name, + const char *content) +{ + struct inode *dir = parent->d_inode; + struct dentry *dentry; + int ret; + + inode_lock(dir); + dentry = d_alloc_name(parent, name); + if (!dentry) + goto out; + ret = __nfsd_symlink(d_inode(parent), dentry, S_IFLNK | 0777, content); + if (ret) + dput(dentry); +out: + inode_unlock(dir); +} +#else +static inline void nfsd_symlink(struct dentry *parent, const char *name, + const char *content) +{ +} + +#endif + static void clear_ncl(struct inode *inode) { struct nfsdfs_client *ncl = inode->i_private; @@ -1355,10 +1394,6 @@ static int nfsd_fill_super(struct super_block *sb, struct fs_context *fc) [NFSD_MaxBlkSize] = {"max_block_size", &transaction_ops, S_IWUSR|S_IRUGO}, [NFSD_MaxConnections] = {"max_connections", &transaction_ops, S_IWUSR|S_IRUGO}, [NFSD_Filecache] = {"filecache", &nfsd_file_cache_stats_fops, S_IRUGO}, -#if defined(CONFIG_SUNRPC_GSS) || defined(CONFIG_SUNRPC_GSS_MODULE) - [NFSD_SupportedEnctypes] = {"supported_krb5_enctypes", - &supported_enctypes_fops, S_IRUGO}, -#endif /* CONFIG_SUNRPC_GSS or CONFIG_SUNRPC_GSS_MODULE */ #ifdef CONFIG_NFSD_V4 [NFSD_Leasetime] = {"nfsv4leasetime", &transaction_ops, S_IWUSR|S_IRUSR}, [NFSD_Gracetime] = {"nfsv4gracetime", &transaction_ops, S_IWUSR|S_IRUSR}, @@ -1371,6 +1406,8 @@ static int nfsd_fill_super(struct super_block *sb, struct fs_context *fc) ret = simple_fill_super(sb, 0x6e667364, nfsd_files); if (ret) return ret; + nfsd_symlink(sb->s_root, "supported_krb5_enctypes", + "/proc/net/rpc/gss_krb5_enctypes"); dentry = nfsd_mkdir(sb->s_root, NULL, "clients"); if (IS_ERR(dentry)) return PTR_ERR(dentry); @@ -1457,21 +1494,12 @@ static __net_init int nfsd_init_net(struct net *net) goto out_idmap_error; nn->nfsd_versions = NULL; nn->nfsd4_minorversions = NULL; - retval = nfsd4_init_leases_net(nn); - if (retval) - goto out_drc_error; - retval = nfsd_reply_cache_init(nn); - if (retval) - goto out_cache_error; + nfsd4_init_leases_net(nn); get_random_bytes(&nn->siphash_key, sizeof(nn->siphash_key)); seqlock_init(&nn->writeverf_lock); return 0; -out_cache_error: - nfsd4_leases_net_shutdown(nn); -out_drc_error: - nfsd_idmap_shutdown(net); out_idmap_error: nfsd_export_shutdown(net); out_export_error: @@ -1480,13 +1508,9 @@ out_export_error: static __net_exit void nfsd_exit_net(struct net *net) { - struct nfsd_net *nn = net_generic(net, nfsd_net_id); - - nfsd_reply_cache_shutdown(nn); nfsd_idmap_shutdown(net); nfsd_export_shutdown(net); nfsd_netns_free_versions(net_generic(net, nfsd_net_id)); - nfsd4_leases_net_shutdown(nn); } static struct pernet_operations nfsd_net_ops = { diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index 93b42ef9ed91..d88498f8b275 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h @@ -86,7 +86,7 @@ bool nfssvc_encode_voidres(struct svc_rqst *rqstp, * Function prototypes. */ int nfsd_svc(int nrservs, struct net *net, const struct cred *cred); -int nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp); +int nfsd_dispatch(struct svc_rqst *rqstp); int nfsd_nrthreads(struct net *); int nfsd_nrpools(struct net *); @@ -504,8 +504,7 @@ extern void unregister_cld_notifier(void); extern void nfsd4_ssc_init_umount_work(struct nfsd_net *nn); #endif -extern int nfsd4_init_leases_net(struct nfsd_net *nn); -extern void nfsd4_leases_net_shutdown(struct nfsd_net *nn); +extern void nfsd4_init_leases_net(struct nfsd_net *nn); #else /* CONFIG_NFSD_V4 */ static inline int nfsd4_is_junction(struct dentry *dentry) @@ -513,8 +512,7 @@ static inline int nfsd4_is_junction(struct dentry *dentry) return 0; } -static inline int nfsd4_init_leases_net(struct nfsd_net *nn) { return 0; }; -static inline void nfsd4_leases_net_shutdown(struct nfsd_net *nn) {}; +static inline void nfsd4_init_leases_net(struct nfsd_net *nn) { }; #define register_cld_notifier() 0 #define unregister_cld_notifier() do { } while(0) diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index 8c52b6c9d31a..ccd8485fee04 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c @@ -40,7 +40,7 @@ static int nfsd_acceptable(void *expv, struct dentry *dentry) /* make sure parents give x permission to user */ int err; parent = dget_parent(tdentry); - err = inode_permission(&init_user_ns, + err = inode_permission(&nop_mnt_idmap, d_inode(parent), MAY_EXEC); if (err < 0) { dput(parent); @@ -628,6 +628,10 @@ void fh_fill_pre_attrs(struct svc_fh *fhp) stat.mtime = inode->i_mtime; stat.ctime = inode->i_ctime; stat.size = inode->i_size; + if (v4 && IS_I_VERSION(inode)) { + stat.change_cookie = inode_query_iversion(inode); + stat.result_mask |= STATX_CHANGE_COOKIE; + } } if (v4) fhp->fh_pre_change = nfsd4_change_attribute(&stat, inode); @@ -659,6 +663,10 @@ void fh_fill_post_attrs(struct svc_fh *fhp) if (err) { fhp->fh_post_saved = false; fhp->fh_post_attr.ctime = inode->i_ctime; + if (v4 && IS_I_VERSION(inode)) { + fhp->fh_post_attr.change_cookie = inode_query_iversion(inode); + fhp->fh_post_attr.result_mask |= STATX_CHANGE_COOKIE; + } } else fhp->fh_post_saved = true; if (v4) @@ -748,3 +756,37 @@ enum fsid_source fsid_source(const struct svc_fh *fhp) return FSIDSOURCE_UUID; return FSIDSOURCE_DEV; } + +/* + * We could use i_version alone as the change attribute. However, i_version + * can go backwards on a regular file after an unclean shutdown. On its own + * that doesn't necessarily cause a problem, but if i_version goes backwards + * and then is incremented again it could reuse a value that was previously + * used before boot, and a client who queried the two values might incorrectly + * assume nothing changed. + * + * By using both ctime and the i_version counter we guarantee that as long as + * time doesn't go backwards we never reuse an old value. If the filesystem + * advertises STATX_ATTR_CHANGE_MONOTONIC, then this mitigation is not + * needed. + * + * We only need to do this for regular files as well. For directories, we + * assume that the new change attr is always logged to stable storage in some + * fashion before the results can be seen. + */ +u64 nfsd4_change_attribute(struct kstat *stat, struct inode *inode) +{ + u64 chattr; + + if (stat->result_mask & STATX_CHANGE_COOKIE) { + chattr = stat->change_cookie; + if (S_ISREG(inode->i_mode) && + !(stat->attributes & STATX_ATTR_CHANGE_MONOTONIC)) { + chattr += (u64)stat->ctime.tv_sec << 30; + chattr += stat->ctime.tv_nsec; + } + } else { + chattr = time_to_chattr(&stat->ctime); + } + return chattr; +} diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h index 513e028b0bbe..4e0ecf0ae2cf 100644 --- a/fs/nfsd/nfsfh.h +++ b/fs/nfsd/nfsfh.h @@ -293,34 +293,7 @@ static inline void fh_clear_pre_post_attrs(struct svc_fh *fhp) fhp->fh_pre_saved = false; } -/* - * We could use i_version alone as the change attribute. However, - * i_version can go backwards after a reboot. On its own that doesn't - * necessarily cause a problem, but if i_version goes backwards and then - * is incremented again it could reuse a value that was previously used - * before boot, and a client who queried the two values might - * incorrectly assume nothing changed. - * - * By using both ctime and the i_version counter we guarantee that as - * long as time doesn't go backwards we never reuse an old value. - */ -static inline u64 nfsd4_change_attribute(struct kstat *stat, - struct inode *inode) -{ - if (inode->i_sb->s_export_op->fetch_iversion) - return inode->i_sb->s_export_op->fetch_iversion(inode); - else if (IS_I_VERSION(inode)) { - u64 chattr; - - chattr = stat->ctime.tv_sec; - chattr <<= 30; - chattr += stat->ctime.tv_nsec; - chattr += inode_query_iversion(inode); - return chattr; - } else - return time_to_chattr(&stat->ctime); -} - +u64 nfsd4_change_attribute(struct kstat *stat, struct inode *inode); extern void fh_fill_pre_attrs(struct svc_fh *fhp); extern void fh_fill_post_attrs(struct svc_fh *fhp); extern void fh_fill_both_attrs(struct svc_fh *fhp); diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c index a5570cf75f3f..c37195572fd0 100644 --- a/fs/nfsd/nfsproc.c +++ b/fs/nfsd/nfsproc.c @@ -93,7 +93,7 @@ nfsd_proc_setattr(struct svc_rqst *rqstp) if (delta < 0) delta = -delta; if (delta < MAX_TOUCH_TIME_ERROR && - setattr_prepare(&init_user_ns, fhp->fh_dentry, iap) != 0) { + setattr_prepare(&nop_mnt_idmap, fhp->fh_dentry, iap) != 0) { /* * Turn off ATTR_[AM]TIME_SET but leave ATTR_[AM]TIME. * This will cause notify_change to set these times @@ -211,7 +211,7 @@ nfsd_proc_read(struct svc_rqst *rqstp) if (resp->status == nfs_ok) resp->status = fh_getattr(&resp->fh, &resp->stat); else if (resp->status == nfserr_jukebox) - __set_bit(RQ_DROPME, &rqstp->rq_flags); + set_bit(RQ_DROPME, &rqstp->rq_flags); return rpc_success; } @@ -246,7 +246,7 @@ nfsd_proc_write(struct svc_rqst *rqstp) if (resp->status == nfs_ok) resp->status = fh_getattr(&resp->fh, &resp->stat); else if (resp->status == nfserr_jukebox) - __set_bit(RQ_DROPME, &rqstp->rq_flags); + set_bit(RQ_DROPME, &rqstp->rq_flags); return rpc_success; } @@ -838,11 +838,11 @@ static const struct svc_procedure nfsd_procedures2[18] = { }, }; - -static unsigned int nfsd_count2[ARRAY_SIZE(nfsd_procedures2)]; +static DEFINE_PER_CPU_ALIGNED(unsigned long, + nfsd_count2[ARRAY_SIZE(nfsd_procedures2)]); const struct svc_version nfsd_version2 = { .vs_vers = 2, - .vs_nproc = 18, + .vs_nproc = ARRAY_SIZE(nfsd_procedures2), .vs_proc = nfsd_procedures2, .vs_count = nfsd_count2, .vs_dispatch = nfsd_dispatch, diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 56fba1cba3af..9c7b1ef5be40 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -363,7 +363,7 @@ void nfsd_copy_write_verifier(__be32 verf[2], struct nfsd_net *nn) do { read_seqbegin_or_lock(&nn->writeverf_lock, &seq); - memcpy(verf, nn->writeverf, sizeof(*verf)); + memcpy(verf, nn->writeverf, sizeof(nn->writeverf)); } while (need_seqretry(&nn->writeverf_lock, seq)); done_seqretry(&nn->writeverf_lock, seq); } @@ -427,16 +427,23 @@ static int nfsd_startup_net(struct net *net, const struct cred *cred) ret = nfsd_file_cache_start_net(net); if (ret) goto out_lockd; - ret = nfs4_state_start_net(net); + + ret = nfsd_reply_cache_init(nn); if (ret) goto out_filecache; + ret = nfs4_state_start_net(net); + if (ret) + goto out_reply_cache; + #ifdef CONFIG_NFSD_V4_2_INTER_SSC nfsd4_ssc_init_umount_work(nn); #endif nn->nfsd_net_up = true; return 0; +out_reply_cache: + nfsd_reply_cache_shutdown(nn); out_filecache: nfsd_file_cache_shutdown_net(net); out_lockd: @@ -453,8 +460,9 @@ static void nfsd_shutdown_net(struct net *net) { struct nfsd_net *nn = net_generic(net, nfsd_net_id); - nfsd_file_cache_shutdown_net(net); nfs4_state_shutdown_net(net); + nfsd_reply_cache_shutdown(nn); + nfsd_file_cache_shutdown_net(net); if (nn->lockd_up) { lockd_down(net); nn->lockd_up = false; @@ -1022,7 +1030,6 @@ out: /** * nfsd_dispatch - Process an NFS or NFSACL Request * @rqstp: incoming request - * @statp: pointer to location of accept_stat field in RPC Reply buffer * * This RPC dispatcher integrates the NFS server's duplicate reply cache. * @@ -1030,9 +1037,10 @@ out: * %0: Processing complete; do not send a Reply * %1: Processing complete; send Reply in rqstp->rq_res */ -int nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp) +int nfsd_dispatch(struct svc_rqst *rqstp) { const struct svc_procedure *proc = rqstp->rq_procinfo; + __be32 *statp = rqstp->rq_accept_statp; /* * Give the xdr decoder a chance to change this if it wants @@ -1040,7 +1048,6 @@ int nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp) */ rqstp->rq_cachetype = proc->pc_cachetype; - svcxdr_init_decode(rqstp); if (!proc->pc_decode(rqstp, &rqstp->rq_arg_stream)) goto out_decode_err; @@ -1053,12 +1060,6 @@ int nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp) goto out_dropit; } - /* - * Need to grab the location to store the status, as - * NFSv4 does some encoding while processing - */ - svcxdr_init_encode(rqstp); - *statp = proc->pc_func(rqstp); if (test_bit(RQ_DROPME, &rqstp->rq_flags)) goto out_update_drop; diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index e94634d30591..d49d3060ed4f 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -705,8 +705,6 @@ extern struct nfs4_client_reclaim *nfs4_client_to_reclaim(struct xdr_netobj name extern bool nfs4_has_reclaimed_state(struct xdr_netobj name, struct nfsd_net *nn); void put_nfs4_file(struct nfs4_file *fi); -extern struct nfsd4_copy * -find_async_copy(struct nfs4_client *clp, stateid_t *staetid); extern void nfs4_put_cpntf_state(struct nfsd_net *nn, struct nfs4_cpntf_state *cps); extern __be32 manage_cpntf_state(struct nfsd_net *nn, stateid_t *st, diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h index c852ae8eaf37..4183819ea082 100644 --- a/fs/nfsd/trace.h +++ b/fs/nfsd/trace.h @@ -981,43 +981,6 @@ TRACE_EVENT(nfsd_file_acquire, ) ); -TRACE_EVENT(nfsd_file_create, - TP_PROTO( - const struct svc_rqst *rqstp, - unsigned int may_flags, - const struct nfsd_file *nf - ), - - TP_ARGS(rqstp, may_flags, nf), - - TP_STRUCT__entry( - __field(const void *, nf_inode) - __field(const void *, nf_file) - __field(unsigned long, may_flags) - __field(unsigned long, nf_flags) - __field(unsigned long, nf_may) - __field(unsigned int, nf_ref) - __field(u32, xid) - ), - - TP_fast_assign( - __entry->nf_inode = nf->nf_inode; - __entry->nf_file = nf->nf_file; - __entry->may_flags = may_flags; - __entry->nf_flags = nf->nf_flags; - __entry->nf_may = nf->nf_may; - __entry->nf_ref = refcount_read(&nf->nf_ref); - __entry->xid = be32_to_cpu(rqstp->rq_xid); - ), - - TP_printk("xid=0x%x inode=%p may_flags=%s ref=%u nf_flags=%s nf_may=%s nf_file=%p", - __entry->xid, __entry->nf_inode, - show_nfsd_may_flags(__entry->may_flags), - __entry->nf_ref, show_nf_flags(__entry->nf_flags), - show_nfsd_may_flags(__entry->nf_may), __entry->nf_file - ) -); - TRACE_EVENT(nfsd_file_insert_err, TP_PROTO( const struct svc_rqst *rqstp, @@ -1079,8 +1042,8 @@ TRACE_EVENT(nfsd_file_cons_err, ) ); -TRACE_EVENT(nfsd_file_open, - TP_PROTO(struct nfsd_file *nf, __be32 status), +DECLARE_EVENT_CLASS(nfsd_file_open_class, + TP_PROTO(const struct nfsd_file *nf, __be32 status), TP_ARGS(nf, status), TP_STRUCT__entry( __field(void *, nf_inode) /* cannot be dereferenced */ @@ -1104,6 +1067,17 @@ TRACE_EVENT(nfsd_file_open, __entry->nf_file) ) +#define DEFINE_NFSD_FILE_OPEN_EVENT(name) \ +DEFINE_EVENT(nfsd_file_open_class, name, \ + TP_PROTO( \ + const struct nfsd_file *nf, \ + __be32 status \ + ), \ + TP_ARGS(nf, status)) + +DEFINE_NFSD_FILE_OPEN_EVENT(nfsd_file_open); +DEFINE_NFSD_FILE_OPEN_EVENT(nfsd_file_opened); + TRACE_EVENT(nfsd_file_is_cached, TP_PROTO( const struct inode *inode, @@ -1228,37 +1202,6 @@ TRACE_EVENT(nfsd_file_close, ) ); -TRACE_EVENT(nfsd_file_fsync, - TP_PROTO( - const struct nfsd_file *nf, - int ret - ), - TP_ARGS(nf, ret), - TP_STRUCT__entry( - __field(void *, nf_inode) - __field(int, nf_ref) - __field(int, ret) - __field(unsigned long, nf_flags) - __field(unsigned char, nf_may) - __field(struct file *, nf_file) - ), - TP_fast_assign( - __entry->nf_inode = nf->nf_inode; - __entry->nf_ref = refcount_read(&nf->nf_ref); - __entry->ret = ret; - __entry->nf_flags = nf->nf_flags; - __entry->nf_may = nf->nf_may; - __entry->nf_file = nf->nf_file; - ), - TP_printk("inode=%p ref=%d flags=%s may=%s nf_file=%p ret=%d", - __entry->nf_inode, - __entry->nf_ref, - show_nf_flags(__entry->nf_flags), - show_nfsd_may_flags(__entry->nf_may), - __entry->nf_file, __entry->ret - ) -); - #include "cache.h" TRACE_DEFINE_ENUM(RC_DROPIT); diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 4c3a0d84043c..e7462b5e5f1e 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -126,9 +126,13 @@ nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp, struct dentry *dentry = *dpp; struct path path = {.mnt = mntget(exp->ex_path.mnt), .dentry = dget(dentry)}; + unsigned int follow_flags = 0; int err = 0; - err = follow_down(&path); + if (exp->ex_flags & NFSEXP_CROSSMOUNT) + follow_flags = LOOKUP_AUTOMOUNT; + + err = follow_down(&path, follow_flags); if (err < 0) goto out; if (path.mnt == exp->ex_path.mnt && path.dentry == dentry && @@ -223,7 +227,7 @@ int nfsd_mountpoint(struct dentry *dentry, struct svc_export *exp) return 1; if (nfsd4_is_junction(dentry)) return 1; - if (d_mountpoint(dentry)) + if (d_managed(dentry)) /* * Might only be a mountpoint in a different namespace, * but we need to check. @@ -426,7 +430,7 @@ static int __nfsd_setattr(struct dentry *dentry, struct iattr *iap) if (iap->ia_size < 0) return -EFBIG; - host_err = notify_change(&init_user_ns, dentry, &size_attr, NULL); + host_err = notify_change(&nop_mnt_idmap, dentry, &size_attr, NULL); if (host_err) return host_err; iap->ia_valid &= ~ATTR_SIZE; @@ -444,7 +448,7 @@ static int __nfsd_setattr(struct dentry *dentry, struct iattr *iap) return 0; iap->ia_valid |= ATTR_CTIME; - return notify_change(&init_user_ns, dentry, iap, NULL); + return notify_change(&nop_mnt_idmap, dentry, iap, NULL); } /** @@ -542,12 +546,12 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, attr->na_labelerr = security_inode_setsecctx(dentry, attr->na_seclabel->data, attr->na_seclabel->len); if (IS_ENABLED(CONFIG_FS_POSIX_ACL) && attr->na_pacl) - attr->na_aclerr = set_posix_acl(&init_user_ns, + attr->na_aclerr = set_posix_acl(&nop_mnt_idmap, dentry, ACL_TYPE_ACCESS, attr->na_pacl); if (IS_ENABLED(CONFIG_FS_POSIX_ACL) && !attr->na_aclerr && attr->na_dpacl && S_ISDIR(inode->i_mode)) - attr->na_aclerr = set_posix_acl(&init_user_ns, + attr->na_aclerr = set_posix_acl(&nop_mnt_idmap, dentry, ACL_TYPE_DEFAULT, attr->na_dpacl); inode_unlock(inode); @@ -583,7 +587,7 @@ int nfsd4_is_junction(struct dentry *dentry) return 0; if (!(inode->i_mode & S_ISVTX)) return 0; - if (vfs_getxattr(&init_user_ns, dentry, NFSD_JUNCTION_XATTR_NAME, + if (vfs_getxattr(&nop_mnt_idmap, dentry, NFSD_JUNCTION_XATTR_NAME, NULL, 0) <= 0) return 0; return 1; @@ -1363,12 +1367,13 @@ nfsd_create_locked(struct svc_rqst *rqstp, struct svc_fh *fhp, err = 0; switch (type) { case S_IFREG: - host_err = vfs_create(&init_user_ns, dirp, dchild, iap->ia_mode, true); + host_err = vfs_create(&nop_mnt_idmap, dirp, dchild, + iap->ia_mode, true); if (!host_err) nfsd_check_ignore_resizing(iap); break; case S_IFDIR: - host_err = vfs_mkdir(&init_user_ns, dirp, dchild, iap->ia_mode); + host_err = vfs_mkdir(&nop_mnt_idmap, dirp, dchild, iap->ia_mode); if (!host_err && unlikely(d_unhashed(dchild))) { struct dentry *d; d = lookup_one_len(dchild->d_name.name, @@ -1396,7 +1401,7 @@ nfsd_create_locked(struct svc_rqst *rqstp, struct svc_fh *fhp, case S_IFBLK: case S_IFIFO: case S_IFSOCK: - host_err = vfs_mknod(&init_user_ns, dirp, dchild, + host_err = vfs_mknod(&nop_mnt_idmap, dirp, dchild, iap->ia_mode, rdev); break; default: @@ -1557,7 +1562,7 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp, goto out_drop_write; } fh_fill_pre_attrs(fhp); - host_err = vfs_symlink(&init_user_ns, d_inode(dentry), dnew, path); + host_err = vfs_symlink(&nop_mnt_idmap, d_inode(dentry), dnew, path); err = nfserrno(host_err); cerr = fh_compose(resfhp, fhp->fh_export, dnew, fhp); if (!err) @@ -1625,7 +1630,7 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp, if (d_really_is_negative(dold)) goto out_dput; fh_fill_pre_attrs(ffhp); - host_err = vfs_link(dold, &init_user_ns, dirp, dnew, NULL); + host_err = vfs_link(dold, &nop_mnt_idmap, dirp, dnew, NULL); fh_fill_post_attrs(ffhp); inode_unlock(dirp); if (!host_err) { @@ -1745,10 +1750,10 @@ retry: goto out_dput_old; } else { struct renamedata rd = { - .old_mnt_userns = &init_user_ns, + .old_mnt_idmap = &nop_mnt_idmap, .old_dir = fdir, .old_dentry = odentry, - .new_mnt_userns = &init_user_ns, + .new_mnt_idmap = &nop_mnt_idmap, .new_dir = tdir, .new_dentry = ndentry, }; @@ -1850,14 +1855,14 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, nfsd_close_cached_files(rdentry); for (retries = 1;;) { - host_err = vfs_unlink(&init_user_ns, dirp, rdentry, NULL); + host_err = vfs_unlink(&nop_mnt_idmap, dirp, rdentry, NULL); if (host_err != -EAGAIN || !retries--) break; if (!nfsd_wait_for_delegreturn(rqstp, rinode)) break; } } else { - host_err = vfs_rmdir(&init_user_ns, dirp, rdentry); + host_err = vfs_rmdir(&nop_mnt_idmap, dirp, rdentry); } fh_fill_post_attrs(fhp); @@ -2129,7 +2134,7 @@ nfsd_getxattr(struct svc_rqst *rqstp, struct svc_fh *fhp, char *name, inode_lock_shared(inode); - len = vfs_getxattr(&init_user_ns, dentry, name, NULL, 0); + len = vfs_getxattr(&nop_mnt_idmap, dentry, name, NULL, 0); /* * Zero-length attribute, just return. @@ -2156,7 +2161,7 @@ nfsd_getxattr(struct svc_rqst *rqstp, struct svc_fh *fhp, char *name, goto out; } - len = vfs_getxattr(&init_user_ns, dentry, name, buf, len); + len = vfs_getxattr(&nop_mnt_idmap, dentry, name, buf, len); if (len <= 0) { kvfree(buf); buf = NULL; @@ -2267,7 +2272,7 @@ nfsd_removexattr(struct svc_rqst *rqstp, struct svc_fh *fhp, char *name) inode_lock(fhp->fh_dentry->d_inode); fh_fill_pre_attrs(fhp); - ret = __vfs_removexattr_locked(&init_user_ns, fhp->fh_dentry, + ret = __vfs_removexattr_locked(&nop_mnt_idmap, fhp->fh_dentry, name, NULL); fh_fill_post_attrs(fhp); @@ -2294,7 +2299,7 @@ nfsd_setxattr(struct svc_rqst *rqstp, struct svc_fh *fhp, char *name, inode_lock(fhp->fh_dentry->d_inode); fh_fill_pre_attrs(fhp); - ret = __vfs_setxattr_locked(&init_user_ns, fhp->fh_dentry, name, buf, + ret = __vfs_setxattr_locked(&nop_mnt_idmap, fhp->fh_dentry, name, buf, len, flags, NULL); fh_fill_post_attrs(fhp); inode_unlock(fhp->fh_dentry->d_inode); @@ -2378,14 +2383,14 @@ nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp, return 0; /* This assumes NFSD_MAY_{READ,WRITE,EXEC} == MAY_{READ,WRITE,EXEC} */ - err = inode_permission(&init_user_ns, inode, + err = inode_permission(&nop_mnt_idmap, inode, acc & (MAY_READ | MAY_WRITE | MAY_EXEC)); /* Allow read access to binaries even when mode 111 */ if (err == -EACCES && S_ISREG(inode->i_mode) && (acc == (NFSD_MAY_READ | NFSD_MAY_OWNER_OVERRIDE) || acc == (NFSD_MAY_READ | NFSD_MAY_READ_IF_EXEC))) - err = inode_permission(&init_user_ns, inode, MAY_EXEC); + err = inode_permission(&nop_mnt_idmap, inode, MAY_EXEC); return err? nfserrno(err) : 0; } diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h index dbdfef7ae85b..43fb57a301d3 100644 --- a/fs/nfsd/vfs.h +++ b/fs/nfsd/vfs.h @@ -170,9 +170,14 @@ static inline void fh_drop_write(struct svc_fh *fh) static inline __be32 fh_getattr(const struct svc_fh *fh, struct kstat *stat) { + u32 request_mask = STATX_BASIC_STATS; struct path p = {.mnt = fh->fh_export->ex_path.mnt, .dentry = fh->fh_dentry}; - return nfserrno(vfs_getattr(&p, stat, STATX_BASIC_STATS, + + if (fh->fh_maxsize == NFS4_FHSIZE) + request_mask |= (STATX_BTIME | STATX_CHANGE_COOKIE); + + return nfserrno(vfs_getattr(&p, stat, request_mask, AT_STATX_SYNC_AS_STAT)); } diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index 4fd2cf6d1d2d..510978e602da 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -571,7 +571,7 @@ struct nfsd4_copy { struct task_struct *copy_task; refcount_t refcount; - struct vfsmount *ss_mnt; + struct nfsd4_ssc_umount_item *ss_nsui; struct nfs_fh c_fh; nfs4_stateid stateid; }; |