diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2022-08-02 13:27:23 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2022-08-02 13:27:23 -0700 |
commit | 98e247464088a11ce2328a214fdb87d4c06f8db6 (patch) | |
tree | 1f876a9b06b189855adfbefd1da6b6753effd1bb /io_uring | |
parent | Merge tag 'for-5.20/io_uring-2022-07-29' of git://git.kernel.dk/linux-block (diff) | |
parent | mm: honor FGP_NOWAIT for page cache page allocation (diff) | |
download | linux-dev-98e247464088a11ce2328a214fdb87d4c06f8db6.tar.xz linux-dev-98e247464088a11ce2328a214fdb87d4c06f8db6.zip |
Merge tag 'for-5.20/io_uring-buffered-writes-2022-07-29' of git://git.kernel.dk/linux-block
Pull io_uring buffered writes support from Jens Axboe:
"This contains support for buffered writes, specifically for XFS. btrfs
is in progress, will be coming in the next release.
io_uring does support buffered writes on any file type, but since the
buffered write path just always -EAGAIN (or -EOPNOTSUPP) any attempt
to do so if IOCB_NOWAIT is set, any buffered write will effectively be
handled by io-wq offload. This isn't very efficient, and we even have
specific code in io-wq to serialize buffered writes to the same inode
to avoid further inefficiencies with thread offload.
This is particularly sad since most buffered writes don't block, they
simply copy data to a page and dirty it. With this pull request, we
can handle buffered writes a lot more effiently.
If balance_dirty_pages() needs to block, we back off on writes as
indicated.
This improves buffered write support by 2-3x.
Jan Kara helped with the mm bits for this, and Stefan handled the
fs/iomap/xfs/io_uring parts of it"
* tag 'for-5.20/io_uring-buffered-writes-2022-07-29' of git://git.kernel.dk/linux-block:
mm: honor FGP_NOWAIT for page cache page allocation
xfs: Add async buffered write support
xfs: Specify lockmode when calling xfs_ilock_for_iomap()
io_uring: Add tracepoint for short writes
io_uring: fix issue with io_write() not always undoing sb_start_write()
io_uring: Add support for async buffered writes
fs: Add async write file modification handling.
fs: Split off inode_needs_update_time and __file_update_time
fs: add __remove_file_privs() with flags parameter
fs: add a FMODE_BUF_WASYNC flags for f_mode
iomap: Return -EAGAIN from iomap_write_iter()
iomap: Add async buffered write support
iomap: Add flags parameter to iomap_page_create()
mm: Add balance_dirty_pages_ratelimited_flags() function
mm: Move updates of dirty_exceeded into one place
mm: Move starting of background writeback into the main balancing loop
Diffstat (limited to 'io_uring')
-rw-r--r-- | io_uring/rw.c | 41 |
1 files changed, 35 insertions, 6 deletions
diff --git a/io_uring/rw.c b/io_uring/rw.c index c50a0d66d67a..2b784795103c 100644 --- a/io_uring/rw.c +++ b/io_uring/rw.c @@ -641,7 +641,7 @@ static inline int io_iter_do_read(struct io_rw *rw, struct iov_iter *iter) return -EINVAL; } -static bool need_read_all(struct io_kiocb *req) +static bool need_complete_io(struct io_kiocb *req) { return req->flags & REQ_F_ISREG || S_ISBLK(file_inode(req->file)->i_mode); @@ -775,7 +775,7 @@ int io_read(struct io_kiocb *req, unsigned int issue_flags) kfree(iovec); return IOU_ISSUE_SKIP_COMPLETE; } else if (ret == req->cqe.res || ret <= 0 || !force_nonblock || - (req->flags & REQ_F_NOWAIT) || !need_read_all(req)) { + (req->flags & REQ_F_NOWAIT) || !need_complete_io(req)) { /* read all, failed, already did sync or don't want to retry */ goto done; } @@ -870,9 +870,10 @@ int io_write(struct io_kiocb *req, unsigned int issue_flags) if (unlikely(!io_file_supports_nowait(req))) goto copy_iov; - /* file path doesn't support NOWAIT for non-direct_IO */ - if (force_nonblock && !(kiocb->ki_flags & IOCB_DIRECT) && - (req->flags & REQ_F_ISREG)) + /* File path supports NOWAIT for non-direct_IO only for block devices. */ + if (!(kiocb->ki_flags & IOCB_DIRECT) && + !(kiocb->ki_filp->f_mode & FMODE_BUF_WASYNC) && + (req->flags & REQ_F_ISREG)) goto copy_iov; kiocb->ki_flags |= IOCB_NOWAIT; @@ -928,13 +929,41 @@ int io_write(struct io_kiocb *req, unsigned int issue_flags) /* IOPOLL retry should happen for io-wq threads */ if (ret2 == -EAGAIN && (req->ctx->flags & IORING_SETUP_IOPOLL)) goto copy_iov; + + if (ret2 != req->cqe.res && ret2 >= 0 && need_complete_io(req)) { + struct io_async_rw *rw; + + trace_io_uring_short_write(req->ctx, kiocb->ki_pos - ret2, + req->cqe.res, ret2); + + /* This is a partial write. The file pos has already been + * updated, setup the async struct to complete the request + * in the worker. Also update bytes_done to account for + * the bytes already written. + */ + iov_iter_save_state(&s->iter, &s->iter_state); + ret = io_setup_async_rw(req, iovec, s, true); + + rw = req->async_data; + if (rw) + rw->bytes_done += ret2; + + if (kiocb->ki_flags & IOCB_WRITE) + kiocb_end_write(req); + return ret ? ret : -EAGAIN; + } done: ret = kiocb_done(req, ret2, issue_flags); } else { copy_iov: iov_iter_restore(&s->iter, &s->iter_state); ret = io_setup_async_rw(req, iovec, s, false); - return ret ?: -EAGAIN; + if (!ret) { + if (kiocb->ki_flags & IOCB_WRITE) + kiocb_end_write(req); + return -EAGAIN; + } + return ret; } /* it's reportedly faster than delegating the null check to kfree() */ if (iovec) |