diff options
Diffstat (limited to 'fs/nfs/direct.c')
-rw-r--r-- | fs/nfs/direct.c | 331 |
1 files changed, 132 insertions, 199 deletions
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index b768a0b42e82..1707f46b1335 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -59,44 +59,13 @@ #include "internal.h" #include "iostat.h" #include "pnfs.h" +#include "fscache.h" +#include "nfstrace.h" #define NFSDBG_FACILITY NFSDBG_VFS static struct kmem_cache *nfs_direct_cachep; -struct nfs_direct_req { - struct kref kref; /* release manager */ - - /* I/O parameters */ - struct nfs_open_context *ctx; /* file open context info */ - struct nfs_lock_context *l_ctx; /* Lock context info */ - struct kiocb * iocb; /* controlling i/o request */ - struct inode * inode; /* target file of i/o */ - - /* completion state */ - atomic_t io_count; /* i/os we're waiting for */ - spinlock_t lock; /* protect completion state */ - - loff_t io_start; /* Start offset for I/O */ - ssize_t count, /* bytes actually processed */ - max_count, /* max expected count */ - bytes_left, /* bytes left to be sent */ - error; /* any reported error */ - struct completion completion; /* wait for i/o completion */ - - /* commit state */ - struct nfs_mds_commit_info mds_cinfo; /* Storage for cinfo */ - struct pnfs_ds_commit_info ds_cinfo; /* Storage for cinfo */ - struct work_struct work; - int flags; - /* for write */ -#define NFS_ODIRECT_DO_COMMIT (1) /* an unstable reply was received */ -#define NFS_ODIRECT_RESCHED_WRITES (2) /* write verification failed */ - /* for read */ -#define NFS_ODIRECT_SHOULD_DIRTY (3) /* dirty user-space page after read */ - struct nfs_writeverf verf; /* unstable write verifier */ -}; - static const struct nfs_pgio_completion_ops nfs_direct_write_completion_ops; static const struct nfs_commit_completion_ops nfs_direct_commit_completion_ops; static void nfs_direct_write_complete(struct nfs_direct_req *dreq); @@ -151,129 +120,26 @@ nfs_direct_count_bytes(struct nfs_direct_req *dreq, dreq->count = dreq_len; } -/* - * nfs_direct_select_verf - select the right verifier - * @dreq - direct request possibly spanning multiple servers - * @ds_clp - nfs_client of data server or NULL if MDS / non-pnfs - * @commit_idx - commit bucket index for the DS - * - * returns the correct verifier to use given the role of the server - */ -static struct nfs_writeverf * -nfs_direct_select_verf(struct nfs_direct_req *dreq, - struct nfs_client *ds_clp, - int commit_idx) -{ - struct nfs_writeverf *verfp = &dreq->verf; - -#ifdef CONFIG_NFS_V4_1 - /* - * pNFS is in use, use the DS verf except commit_through_mds is set - * for layout segment where nbuckets is zero. - */ - if (ds_clp && dreq->ds_cinfo.nbuckets > 0) { - if (commit_idx >= 0 && commit_idx < dreq->ds_cinfo.nbuckets) - verfp = &dreq->ds_cinfo.buckets[commit_idx].direct_verf; - else - WARN_ON_ONCE(1); - } -#endif - return verfp; -} - - -/* - * nfs_direct_set_hdr_verf - set the write/commit verifier - * @dreq - direct request possibly spanning multiple servers - * @hdr - pageio header to validate against previously seen verfs - * - * Set the server's (MDS or DS) "seen" verifier - */ -static void nfs_direct_set_hdr_verf(struct nfs_direct_req *dreq, - struct nfs_pgio_header *hdr) -{ - struct nfs_writeverf *verfp; - - verfp = nfs_direct_select_verf(dreq, hdr->ds_clp, hdr->ds_commit_idx); - WARN_ON_ONCE(verfp->committed >= 0); - memcpy(verfp, &hdr->verf, sizeof(struct nfs_writeverf)); - WARN_ON_ONCE(verfp->committed < 0); -} - -static int nfs_direct_cmp_verf(const struct nfs_writeverf *v1, - const struct nfs_writeverf *v2) -{ - return nfs_write_verifier_cmp(&v1->verifier, &v2->verifier); -} - -/* - * nfs_direct_cmp_hdr_verf - compare verifier for pgio header - * @dreq - direct request possibly spanning multiple servers - * @hdr - pageio header to validate against previously seen verf - * - * set the server's "seen" verf if not initialized. - * returns result of comparison between @hdr->verf and the "seen" - * verf of the server used by @hdr (DS or MDS) - */ -static int nfs_direct_set_or_cmp_hdr_verf(struct nfs_direct_req *dreq, - struct nfs_pgio_header *hdr) -{ - struct nfs_writeverf *verfp; - - verfp = nfs_direct_select_verf(dreq, hdr->ds_clp, hdr->ds_commit_idx); - if (verfp->committed < 0) { - nfs_direct_set_hdr_verf(dreq, hdr); - return 0; - } - return nfs_direct_cmp_verf(verfp, &hdr->verf); -} - -/* - * nfs_direct_cmp_commit_data_verf - compare verifier for commit data - * @dreq - direct request possibly spanning multiple servers - * @data - commit data to validate against previously seen verf - * - * returns result of comparison between @data->verf and the verf of - * the server used by @data (DS or MDS) - */ -static int nfs_direct_cmp_commit_data_verf(struct nfs_direct_req *dreq, - struct nfs_commit_data *data) -{ - struct nfs_writeverf *verfp; - - verfp = nfs_direct_select_verf(dreq, data->ds_clp, - data->ds_commit_index); - - /* verifier not set so always fail */ - if (verfp->committed < 0 || data->res.verf->committed <= NFS_UNSTABLE) - return 1; - - return nfs_direct_cmp_verf(verfp, data->res.verf); -} - /** - * nfs_direct_IO - NFS address space operation for direct I/O + * nfs_swap_rw - NFS address space operation for swap I/O * @iocb: target I/O control block * @iter: I/O buffer * - * The presence of this routine in the address space ops vector means - * the NFS client supports direct I/O. However, for most direct IO, we - * shunt off direct read and write requests before the VFS gets them, - * so this method is only ever called for swap. + * Perform IO to the swap-file. This is much like direct IO. */ -ssize_t nfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) +int nfs_swap_rw(struct kiocb *iocb, struct iov_iter *iter) { - struct inode *inode = iocb->ki_filp->f_mapping->host; - - /* we only support swap file calling nfs_direct_IO */ - if (!IS_SWAPFILE(inode)) - return 0; + ssize_t ret; VM_BUG_ON(iov_iter_count(iter) != PAGE_SIZE); if (iov_iter_rw(iter) == READ) - return nfs_file_direct_read(iocb, iter); - return nfs_file_direct_write(iocb, iter); + ret = nfs_file_direct_read(iocb, iter, true); + else + ret = nfs_file_direct_write(iocb, iter, true); + if (ret < 0) + return ret; + return 0; } static void nfs_direct_release_pages(struct page **pages, unsigned int npages) @@ -305,7 +171,7 @@ static inline struct nfs_direct_req *nfs_direct_req_alloc(void) kref_get(&dreq->kref); init_completion(&dreq->completion); INIT_LIST_HEAD(&dreq->mds_cinfo.list); - dreq->verf.committed = NFS_INVALID_STABLE_HOW; /* not set yet */ + pnfs_init_ds_commit_info(&dreq->ds_cinfo); INIT_WORK(&dreq->work, nfs_direct_write_schedule_work); spin_lock_init(&dreq->lock); @@ -316,7 +182,7 @@ static void nfs_direct_req_free(struct kref *kref) { struct nfs_direct_req *dreq = container_of(kref, struct nfs_direct_req, kref); - nfs_free_pnfs_ds_cinfo(&dreq->ds_cinfo); + pnfs_release_ds_info(&dreq->ds_cinfo, dreq->inode); if (dreq->l_ctx != NULL) nfs_put_lock_context(dreq->l_ctx); if (dreq->ctx != NULL) @@ -375,7 +241,7 @@ static void nfs_direct_complete(struct nfs_direct_req *dreq) res = (long) dreq->count; WARN_ON_ONCE(dreq->count < 0); } - dreq->iocb->ki_complete(dreq->iocb, res, 0); + dreq->iocb->ki_complete(dreq->iocb, res); } complete(&dreq->completion); @@ -466,13 +332,12 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq, size_t pgbase; unsigned npages, i; - result = iov_iter_get_pages_alloc(iter, &pagevec, + result = iov_iter_get_pages_alloc2(iter, &pagevec, rsize, &pgbase); if (result < 0) break; bytes = result; - iov_iter_advance(iter, bytes); npages = (result + pgbase + PAGE_SIZE - 1) / PAGE_SIZE; for (i = 0; i < npages; i++) { struct nfs_page *req; @@ -524,6 +389,7 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq, * nfs_file_direct_read - file direct read operation for NFS files * @iocb: target I/O control block * @iter: vector of user buffers into which to read data + * @swap: flag indicating this is swap IO, not O_DIRECT IO * * We use this function for direct reads instead of calling * generic_file_aio_read() in order to avoid gfar's check to see if @@ -539,14 +405,15 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq, * client must read the updated atime from the server back into its * cache. */ -ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter) +ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter, + bool swap) { struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; struct inode *inode = mapping->host; struct nfs_direct_req *dreq; struct nfs_lock_context *l_ctx; - ssize_t result = -EINVAL, requested; + ssize_t result, requested; size_t count = iov_iter_count(iter); nfs_add_stats(mapping->host, NFSIOS_DIRECTREADBYTES, count); @@ -571,21 +438,24 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter) l_ctx = nfs_get_lock_context(dreq->ctx); if (IS_ERR(l_ctx)) { result = PTR_ERR(l_ctx); + nfs_direct_req_release(dreq); goto out_release; } dreq->l_ctx = l_ctx; if (!is_sync_kiocb(iocb)) dreq->iocb = iocb; - if (iter_is_iovec(iter)) + if (user_backed_iter(iter)) dreq->flags = NFS_ODIRECT_SHOULD_DIRTY; - nfs_start_io_direct(inode); + if (!swap) + nfs_start_io_direct(inode); NFS_I(inode)->read_io += count; requested = nfs_direct_read_schedule_iovec(dreq, iter, iocb->ki_pos); - nfs_end_io_direct(inode); + if (!swap) + nfs_end_io_direct(inode); if (requested > 0) { result = nfs_direct_wait(dreq); @@ -605,15 +475,30 @@ out: } static void +nfs_direct_join_group(struct list_head *list, struct inode *inode) +{ + struct nfs_page *req, *next; + + list_for_each_entry(req, list, wb_list) { + if (req->wb_head != req || req->wb_this_page == req) + continue; + for (next = req->wb_this_page; + next != req->wb_head; + next = next->wb_this_page) { + nfs_list_remove_request(next); + nfs_release_request(next); + } + nfs_join_page_group(req, inode); + } +} + +static void nfs_direct_write_scan_commit_list(struct inode *inode, struct list_head *list, struct nfs_commit_info *cinfo) { mutex_lock(&NFS_I(cinfo->inode)->commit_mutex); -#ifdef CONFIG_NFS_V4_1 - if (cinfo->ds != NULL && cinfo->ds->nwritten != 0) - NFS_SERVER(inode)->pnfs_curr_ld->recover_commit_reqs(list, cinfo); -#endif + pnfs_recover_commit_reqs(list, cinfo); nfs_scan_commit_list(&cinfo->mds->list, list, cinfo, 0); mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex); } @@ -629,11 +514,12 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) nfs_init_cinfo_from_dreq(&cinfo, dreq); nfs_direct_write_scan_commit_list(dreq->inode, &reqs, &cinfo); + nfs_direct_join_group(&reqs, dreq->inode); + dreq->count = 0; dreq->max_count = 0; list_for_each_entry(req, &reqs, wb_list) dreq->max_count += req->wb_bytes; - dreq->verf.committed = NFS_INVALID_STABLE_HOW; nfs_clear_pnfs_ds_commit_verifiers(&dreq->ds_cinfo); get_dreq(dreq); @@ -670,32 +556,43 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) static void nfs_direct_commit_complete(struct nfs_commit_data *data) { + const struct nfs_writeverf *verf = data->res.verf; struct nfs_direct_req *dreq = data->dreq; struct nfs_commit_info cinfo; struct nfs_page *req; int status = data->task.tk_status; + trace_nfs_direct_commit_complete(dreq); + + if (status < 0) { + /* Errors in commit are fatal */ + dreq->error = status; + dreq->max_count = 0; + dreq->count = 0; + dreq->flags = NFS_ODIRECT_DONE; + } else { + status = dreq->error; + } + nfs_init_cinfo_from_dreq(&cinfo, dreq); - if (status < 0 || nfs_direct_cmp_commit_data_verf(dreq, data)) - dreq->flags = NFS_ODIRECT_RESCHED_WRITES; while (!list_empty(&data->pages)) { req = nfs_list_entry(data->pages.next); nfs_list_remove_request(req); - if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES) { + if (status >= 0 && !nfs_write_match_verf(verf, req)) { + dreq->flags = NFS_ODIRECT_RESCHED_WRITES; /* * Despite the reboot, the write was successful, * so reset wb_nio. */ req->wb_nio = 0; - /* Note the rewrite will go through mds */ nfs_mark_request_commit(req, NULL, &cinfo, 0); - } else + } else /* Error or match */ nfs_release_request(req); nfs_unlock_and_release_request(req); } - if (atomic_dec_and_test(&cinfo.mds->rpcs_out)) + if (nfs_commit_end(cinfo.mds)) nfs_direct_write_complete(dreq); } @@ -704,8 +601,11 @@ static void nfs_direct_resched_write(struct nfs_commit_info *cinfo, { struct nfs_direct_req *dreq = cinfo->dreq; + trace_nfs_direct_resched_write(dreq); + spin_lock(&dreq->lock); - dreq->flags = NFS_ODIRECT_RESCHED_WRITES; + if (dreq->flags != NFS_ODIRECT_DONE) + dreq->flags = NFS_ODIRECT_RESCHED_WRITES; spin_unlock(&dreq->lock); nfs_mark_request_commit(req, NULL, cinfo, 0); } @@ -728,6 +628,23 @@ static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq) nfs_direct_write_reschedule(dreq); } +static void nfs_direct_write_clear_reqs(struct nfs_direct_req *dreq) +{ + struct nfs_commit_info cinfo; + struct nfs_page *req; + LIST_HEAD(reqs); + + nfs_init_cinfo_from_dreq(&cinfo, dreq); + nfs_direct_write_scan_commit_list(dreq->inode, &reqs, &cinfo); + + while (!list_empty(&reqs)) { + req = nfs_list_entry(reqs.next); + nfs_list_remove_request(req); + nfs_release_request(req); + nfs_unlock_and_release_request(req); + } +} + static void nfs_direct_write_schedule_work(struct work_struct *work) { struct nfs_direct_req *dreq = container_of(work, struct nfs_direct_req, work); @@ -742,6 +659,7 @@ static void nfs_direct_write_schedule_work(struct work_struct *work) nfs_direct_write_reschedule(dreq); break; default: + nfs_direct_write_clear_reqs(dreq); nfs_zap_mapping(dreq->inode, dreq->inode->i_mapping); nfs_direct_complete(dreq); } @@ -749,6 +667,7 @@ static void nfs_direct_write_schedule_work(struct work_struct *work) static void nfs_direct_write_complete(struct nfs_direct_req *dreq) { + trace_nfs_direct_write_complete(dreq); queue_work(nfsiod_workqueue, &dreq->work); /* Calls nfs_direct_write_schedule_work */ } @@ -756,8 +675,10 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr) { struct nfs_direct_req *dreq = hdr->dreq; struct nfs_commit_info cinfo; - bool request_commit = false; struct nfs_page *req = nfs_list_entry(hdr->pages.next); + int flags = NFS_ODIRECT_DONE; + + trace_nfs_direct_write_completion(dreq); nfs_init_cinfo_from_dreq(&cinfo, dreq); @@ -768,21 +689,10 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr) } nfs_direct_count_bytes(dreq, hdr); - if (hdr->good_bytes != 0) { - if (nfs_write_need_commit(hdr)) { - if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES) - request_commit = true; - else if (dreq->flags == 0) { - nfs_direct_set_hdr_verf(dreq, hdr); - request_commit = true; - dreq->flags = NFS_ODIRECT_DO_COMMIT; - } else if (dreq->flags == NFS_ODIRECT_DO_COMMIT) { - request_commit = true; - if (nfs_direct_set_or_cmp_hdr_verf(dreq, hdr)) - dreq->flags = - NFS_ODIRECT_RESCHED_WRITES; - } - } + if (test_bit(NFS_IOHDR_UNSTABLE_WRITES, &hdr->flags)) { + if (!dreq->flags) + dreq->flags = NFS_ODIRECT_DO_COMMIT; + flags = dreq->flags; } spin_unlock(&dreq->lock); @@ -790,10 +700,15 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr) req = nfs_list_entry(hdr->pages.next); nfs_list_remove_request(req); - if (request_commit) { + if (flags == NFS_ODIRECT_DO_COMMIT) { kref_get(&req->wb_kref); + memcpy(&req->wb_verf, &hdr->verf.verifier, + sizeof(req->wb_verf)); nfs_mark_request_commit(req, hdr->lseg, &cinfo, hdr->ds_commit_idx); + } else if (flags == NFS_ODIRECT_RESCHED_WRITES) { + kref_get(&req->wb_kref); + nfs_mark_request_commit(req, NULL, &cinfo, 0); } nfs_unlock_and_release_request(req); } @@ -819,6 +734,8 @@ static void nfs_direct_write_reschedule_io(struct nfs_pgio_header *hdr) { struct nfs_direct_req *dreq = hdr->dreq; + trace_nfs_direct_write_reschedule_io(dreq); + spin_lock(&dreq->lock); if (dreq->error == 0) { dreq->flags = NFS_ODIRECT_RESCHED_WRITES; @@ -851,7 +768,7 @@ static const struct nfs_pgio_completion_ops nfs_direct_write_completion_ops = { */ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, struct iov_iter *iter, - loff_t pos) + loff_t pos, int ioflags) { struct nfs_pageio_descriptor desc; struct inode *inode = dreq->inode; @@ -859,7 +776,9 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, size_t requested_bytes = 0; size_t wsize = max_t(size_t, NFS_SERVER(inode)->wsize, PAGE_SIZE); - nfs_pageio_init_write(&desc, inode, FLUSH_COND_STABLE, false, + trace_nfs_direct_write_schedule_iovec(dreq); + + nfs_pageio_init_write(&desc, inode, ioflags, false, &nfs_direct_write_completion_ops); desc.pg_dreq = dreq; get_dreq(dreq); @@ -872,13 +791,12 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, size_t pgbase; unsigned npages, i; - result = iov_iter_get_pages_alloc(iter, &pagevec, + result = iov_iter_get_pages_alloc2(iter, &pagevec, wsize, &pgbase); if (result < 0) break; bytes = result; - iov_iter_advance(iter, bytes); npages = (result + pgbase + PAGE_SIZE - 1) / PAGE_SIZE; for (i = 0; i < npages; i++) { struct nfs_page *req; @@ -937,6 +855,7 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, * nfs_file_direct_write - file direct write operation for NFS files * @iocb: target I/O control block * @iter: vector of user buffers from which to write data + * @swap: flag indicating this is swap IO, not O_DIRECT IO * * We use this function for direct writes instead of calling * generic_file_aio_write() in order to avoid taking the inode @@ -953,9 +872,10 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, * Note that O_APPEND is not supported for NFS direct writes, as there * is no atomic O_APPEND write facility in the NFS protocol. */ -ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter) +ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter, + bool swap) { - ssize_t result = -EINVAL, requested; + ssize_t result, requested; size_t count; struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; @@ -967,7 +887,11 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter) dfprintk(FILE, "NFS: direct write(%pD2, %zd@%Ld)\n", file, iov_iter_count(iter), (long long) iocb->ki_pos); - result = generic_write_checks(iocb, iter); + if (swap) + /* bypass generic checks */ + result = iov_iter_count(iter); + else + result = generic_write_checks(iocb, iter); if (result <= 0) return result; count = result; @@ -990,22 +914,30 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter) l_ctx = nfs_get_lock_context(dreq->ctx); if (IS_ERR(l_ctx)) { result = PTR_ERR(l_ctx); + nfs_direct_req_release(dreq); goto out_release; } dreq->l_ctx = l_ctx; if (!is_sync_kiocb(iocb)) dreq->iocb = iocb; + pnfs_init_ds_commit_info_ops(&dreq->ds_cinfo, inode); - nfs_start_io_direct(inode); + if (swap) { + requested = nfs_direct_write_schedule_iovec(dreq, iter, pos, + FLUSH_STABLE); + } else { + nfs_start_io_direct(inode); - requested = nfs_direct_write_schedule_iovec(dreq, iter, pos); + requested = nfs_direct_write_schedule_iovec(dreq, iter, pos, + FLUSH_COND_STABLE); - if (mapping->nrpages) { - invalidate_inode_pages2_range(mapping, - pos >> PAGE_SHIFT, end); - } + if (mapping->nrpages) { + invalidate_inode_pages2_range(mapping, + pos >> PAGE_SHIFT, end); + } - nfs_end_io_direct(inode); + nfs_end_io_direct(inode); + } if (requested > 0) { result = nfs_direct_wait(dreq); @@ -1019,6 +951,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter) } else { result = requested; } + nfs_fscache_invalidate(inode, FSCACHE_INVAL_DIO_WRITE); out_release: nfs_direct_req_release(dreq); out: |