From 82b145c5a572f7fa7211dffe2097234dc91bcecc Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 20 Jun 2006 12:57:03 -0400 Subject: NFS: alloc nfs_read/write_data as direct I/O is scheduled Re-arrange the logic in the NFS direct I/O path so that nfs_read/write_data structs are allocated just before they are scheduled, rather than allocating them all at once before we start scheduling requests. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/direct.c | 210 ++++++++++++++++++-------------------------------------- 1 file changed, 65 insertions(+), 145 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index b1630d53fbb1..e25b7595b7ad 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -68,8 +68,6 @@ struct nfs_direct_req { struct kref kref; /* release manager */ /* I/O parameters */ - struct list_head list, /* nfs_read/write_data structs */ - rewrite_list; /* saved nfs_write_data structs */ struct nfs_open_context *ctx; /* file open context info */ struct kiocb * iocb; /* controlling i/o request */ struct inode * inode; /* target file of i/o */ @@ -82,6 +80,7 @@ struct nfs_direct_req { struct completion completion; /* wait for i/o completion */ /* commit state */ + struct list_head rewrite_list; /* saved nfs_write_data structs */ struct nfs_write_data * commit_data; /* special write_data for commits */ int flags; #define NFS_ODIRECT_DO_COMMIT (1) /* an unstable reply was received */ @@ -116,6 +115,11 @@ static inline int nfs_direct_count_pages(unsigned long user_addr, size_t size) return page_count; } +static inline unsigned int nfs_max_pages(unsigned int size) +{ + return (size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; +} + /** * nfs_direct_IO - NFS address space operation for direct I/O * @rw: direction (read or write) @@ -164,8 +168,8 @@ static inline struct nfs_direct_req *nfs_direct_req_alloc(void) return NULL; kref_init(&dreq->kref); + kref_get(&dreq->kref); init_completion(&dreq->completion); - INIT_LIST_HEAD(&dreq->list); INIT_LIST_HEAD(&dreq->rewrite_list); dreq->iocb = NULL; dreq->ctx = NULL; @@ -227,49 +231,6 @@ static void nfs_direct_complete(struct nfs_direct_req *dreq) kref_put(&dreq->kref, nfs_direct_req_release); } -/* - * Note we also set the number of requests we have in the dreq when we are - * done. This prevents races with I/O completion so we will always wait - * until all requests have been dispatched and completed. - */ -static struct nfs_direct_req *nfs_direct_read_alloc(size_t nbytes, size_t rsize) -{ - struct list_head *list; - struct nfs_direct_req *dreq; - unsigned int rpages = (rsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; - - dreq = nfs_direct_req_alloc(); - if (!dreq) - return NULL; - - list = &dreq->list; - for(;;) { - struct nfs_read_data *data = nfs_readdata_alloc(rpages); - - if (unlikely(!data)) { - while (!list_empty(list)) { - data = list_entry(list->next, - struct nfs_read_data, pages); - list_del(&data->pages); - nfs_readdata_free(data); - } - kref_put(&dreq->kref, nfs_direct_req_release); - return NULL; - } - - INIT_LIST_HEAD(&data->pages); - list_add(&data->pages, list); - - data->req = (struct nfs_page *) dreq; - get_dreq(dreq); - if (nbytes <= rsize) - break; - nbytes -= rsize; - } - kref_get(&dreq->kref); - return dreq; -} - /* * We must hold a reference to all the pages in this direct read request * until the RPCs complete. This could be long *after* we are woken up in @@ -305,42 +266,53 @@ static const struct rpc_call_ops nfs_read_direct_ops = { }; /* - * For each nfs_read_data struct that was allocated on the list, dispatch - * an NFS READ operation. If get_user_pages() fails, we stop sending reads. - * Read length accounting is handled by nfs_direct_read_result(). - * Otherwise, if no requests have been sent, just return an error. + * For each rsize'd chunk of the user's buffer, dispatch an NFS READ + * operation. If nfs_readdata_alloc() or get_user_pages() fails, + * bail and stop sending more reads. Read length accounting is + * handled automatically by nfs_direct_read_result(). Otherwise, if + * no requests have been sent, just return an error. */ static ssize_t nfs_direct_read_schedule(struct nfs_direct_req *dreq, unsigned long user_addr, size_t count, loff_t pos) { struct nfs_open_context *ctx = dreq->ctx; struct inode *inode = ctx->dentry->d_inode; - struct list_head *list = &dreq->list; size_t rsize = NFS_SERVER(inode)->rsize; + unsigned int rpages = nfs_max_pages(rsize); unsigned int pgbase; int result; ssize_t started = 0; - struct nfs_read_data *data; + + get_dreq(dreq); pgbase = user_addr & ~PAGE_MASK; do { + struct nfs_read_data *data; size_t bytes; + result = -ENOMEM; + data = nfs_readdata_alloc(rpages); + if (unlikely(!data)) + break; + bytes = rsize; if (count < rsize) bytes = count; - BUG_ON(list_empty(list)); - data = list_entry(list->next, struct nfs_read_data, pages); - list_del_init(&data->pages); - data->npages = nfs_direct_count_pages(user_addr, bytes); down_read(¤t->mm->mmap_sem); result = get_user_pages(current, current->mm, user_addr, data->npages, 1, 0, data->pagevec, NULL); up_read(¤t->mm->mmap_sem); - if (unlikely(result < data->npages)) - goto out_err; + if (unlikely(result < data->npages)) { + if (result > 0) + nfs_direct_release_pages(data->pagevec, result); + nfs_readdata_release(data); + break; + } + get_dreq(dreq); + + data->req = (struct nfs_page *) dreq; data->inode = inode; data->cred = ctx->cred; data->args.fh = NFS_FH(inode); @@ -378,21 +350,9 @@ static ssize_t nfs_direct_read_schedule(struct nfs_direct_req *dreq, unsigned lo count -= bytes; } while (count != 0); - BUG_ON(!list_empty(list)); - return 0; -out_err: - if (result > 0) - nfs_direct_release_pages(data->pagevec, result); - - list_add(&data->pages, list); - while (!list_empty(list)) { - data = list_entry(list->next, struct nfs_read_data, pages); - list_del(&data->pages); - nfs_readdata_free(data); - if (put_dreq(dreq)) - nfs_direct_complete(dreq); - } + if (put_dreq(dreq)) + nfs_direct_complete(dreq); if (started) return 0; @@ -401,13 +361,13 @@ out_err: static ssize_t nfs_direct_read(struct kiocb *iocb, unsigned long user_addr, size_t count, loff_t pos) { - ssize_t result; + ssize_t result = 0; sigset_t oldset; struct inode *inode = iocb->ki_filp->f_mapping->host; struct rpc_clnt *clnt = NFS_CLIENT(inode); struct nfs_direct_req *dreq; - dreq = nfs_direct_read_alloc(count, NFS_SERVER(inode)->rsize); + dreq = nfs_direct_req_alloc(); if (!dreq) return -ENOMEM; @@ -428,9 +388,8 @@ static ssize_t nfs_direct_read(struct kiocb *iocb, unsigned long user_addr, size static void nfs_direct_free_writedata(struct nfs_direct_req *dreq) { - list_splice_init(&dreq->rewrite_list, &dreq->list); - while (!list_empty(&dreq->list)) { - struct nfs_write_data *data = list_entry(dreq->list.next, struct nfs_write_data, pages); + while (!list_empty(&dreq->rewrite_list)) { + struct nfs_write_data *data = list_entry(dreq->rewrite_list.next, struct nfs_write_data, pages); list_del(&data->pages); nfs_direct_release_pages(data->pagevec, data->npages); nfs_writedata_release(data); @@ -584,47 +543,6 @@ static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode } #endif -static struct nfs_direct_req *nfs_direct_write_alloc(size_t nbytes, size_t wsize) -{ - struct list_head *list; - struct nfs_direct_req *dreq; - unsigned int wpages = (wsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; - - dreq = nfs_direct_req_alloc(); - if (!dreq) - return NULL; - - list = &dreq->list; - for(;;) { - struct nfs_write_data *data = nfs_writedata_alloc(wpages); - - if (unlikely(!data)) { - while (!list_empty(list)) { - data = list_entry(list->next, - struct nfs_write_data, pages); - list_del(&data->pages); - nfs_writedata_free(data); - } - kref_put(&dreq->kref, nfs_direct_req_release); - return NULL; - } - - INIT_LIST_HEAD(&data->pages); - list_add(&data->pages, list); - - data->req = (struct nfs_page *) dreq; - get_dreq(dreq); - if (nbytes <= wsize) - break; - nbytes -= wsize; - } - - nfs_alloc_commit_data(dreq); - - kref_get(&dreq->kref); - return dreq; -} - static void nfs_direct_write_result(struct rpc_task *task, void *calldata) { struct nfs_write_data *data = calldata; @@ -677,43 +595,55 @@ static const struct rpc_call_ops nfs_write_direct_ops = { }; /* - * For each nfs_write_data struct that was allocated on the list, dispatch - * an NFS WRITE operation. If get_user_pages() fails, we stop sending writes. - * Write length accounting is handled by nfs_direct_write_result(). - * Otherwise, if no requests have been sent, just return an error. + * For each wsize'd chunk of the user's buffer, dispatch an NFS WRITE + * operation. If nfs_writedata_alloc() or get_user_pages() fails, + * bail and stop sending more writes. Write length accounting is + * handled automatically by nfs_direct_write_result(). Otherwise, if + * no requests have been sent, just return an error. */ static ssize_t nfs_direct_write_schedule(struct nfs_direct_req *dreq, unsigned long user_addr, size_t count, loff_t pos, int sync) { struct nfs_open_context *ctx = dreq->ctx; struct inode *inode = ctx->dentry->d_inode; - struct list_head *list = &dreq->list; size_t wsize = NFS_SERVER(inode)->wsize; + unsigned int wpages = nfs_max_pages(wsize); unsigned int pgbase; int result; ssize_t started = 0; - struct nfs_write_data *data; + + get_dreq(dreq); pgbase = user_addr & ~PAGE_MASK; do { + struct nfs_write_data *data; size_t bytes; + result = -ENOMEM; + data = nfs_writedata_alloc(wpages); + if (unlikely(!data)) + break; + bytes = wsize; if (count < wsize) bytes = count; - BUG_ON(list_empty(list)); - data = list_entry(list->next, struct nfs_write_data, pages); - data->npages = nfs_direct_count_pages(user_addr, bytes); down_read(¤t->mm->mmap_sem); result = get_user_pages(current, current->mm, user_addr, data->npages, 0, 0, data->pagevec, NULL); up_read(¤t->mm->mmap_sem); - if (unlikely(result < data->npages)) - goto out_err; + if (unlikely(result < data->npages)) { + if (result > 0) + nfs_direct_release_pages(data->pagevec, result); + nfs_writedata_release(data); + break; + } + + get_dreq(dreq); list_move_tail(&data->pages, &dreq->rewrite_list); + data->req = (struct nfs_page *) dreq; data->inode = inode; data->cred = ctx->cred; data->args.fh = NFS_FH(inode); @@ -752,21 +682,9 @@ static ssize_t nfs_direct_write_schedule(struct nfs_direct_req *dreq, unsigned l count -= bytes; } while (count != 0); - BUG_ON(!list_empty(list)); - return 0; - -out_err: - if (result > 0) - nfs_direct_release_pages(data->pagevec, result); - list_add(&data->pages, list); - while (!list_empty(list)) { - data = list_entry(list->next, struct nfs_write_data, pages); - list_del(&data->pages); - nfs_writedata_free(data); - if (put_dreq(dreq)) - nfs_direct_write_complete(dreq, inode); - } + if (put_dreq(dreq)) + nfs_direct_write_complete(dreq, inode); if (started) return 0; @@ -775,7 +693,7 @@ out_err: static ssize_t nfs_direct_write(struct kiocb *iocb, unsigned long user_addr, size_t count, loff_t pos) { - ssize_t result; + ssize_t result = 0; sigset_t oldset; struct inode *inode = iocb->ki_filp->f_mapping->host; struct rpc_clnt *clnt = NFS_CLIENT(inode); @@ -783,9 +701,11 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, unsigned long user_addr, siz size_t wsize = NFS_SERVER(inode)->wsize; int sync = 0; - dreq = nfs_direct_write_alloc(count, wsize); + dreq = nfs_direct_req_alloc(); if (!dreq) return -ENOMEM; + nfs_alloc_commit_data(dreq); + if (dreq->commit_data == NULL || count < wsize) sync = FLUSH_STABLE; -- cgit v1.2.3-59-g8ed1b