aboutsummaryrefslogtreecommitdiffstats
path: root/io_uring
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2022-08-02 13:27:23 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2022-08-02 13:27:23 -0700
commit98e247464088a11ce2328a214fdb87d4c06f8db6 (patch)
tree1f876a9b06b189855adfbefd1da6b6753effd1bb /io_uring
parentMerge tag 'for-5.20/io_uring-2022-07-29' of git://git.kernel.dk/linux-block (diff)
parentmm: honor FGP_NOWAIT for page cache page allocation (diff)
downloadlinux-dev-98e247464088a11ce2328a214fdb87d4c06f8db6.tar.xz
linux-dev-98e247464088a11ce2328a214fdb87d4c06f8db6.zip
Merge tag 'for-5.20/io_uring-buffered-writes-2022-07-29' of git://git.kernel.dk/linux-block
Pull io_uring buffered writes support from Jens Axboe: "This contains support for buffered writes, specifically for XFS. btrfs is in progress, will be coming in the next release. io_uring does support buffered writes on any file type, but since the buffered write path just always -EAGAIN (or -EOPNOTSUPP) any attempt to do so if IOCB_NOWAIT is set, any buffered write will effectively be handled by io-wq offload. This isn't very efficient, and we even have specific code in io-wq to serialize buffered writes to the same inode to avoid further inefficiencies with thread offload. This is particularly sad since most buffered writes don't block, they simply copy data to a page and dirty it. With this pull request, we can handle buffered writes a lot more effiently. If balance_dirty_pages() needs to block, we back off on writes as indicated. This improves buffered write support by 2-3x. Jan Kara helped with the mm bits for this, and Stefan handled the fs/iomap/xfs/io_uring parts of it" * tag 'for-5.20/io_uring-buffered-writes-2022-07-29' of git://git.kernel.dk/linux-block: mm: honor FGP_NOWAIT for page cache page allocation xfs: Add async buffered write support xfs: Specify lockmode when calling xfs_ilock_for_iomap() io_uring: Add tracepoint for short writes io_uring: fix issue with io_write() not always undoing sb_start_write() io_uring: Add support for async buffered writes fs: Add async write file modification handling. fs: Split off inode_needs_update_time and __file_update_time fs: add __remove_file_privs() with flags parameter fs: add a FMODE_BUF_WASYNC flags for f_mode iomap: Return -EAGAIN from iomap_write_iter() iomap: Add async buffered write support iomap: Add flags parameter to iomap_page_create() mm: Add balance_dirty_pages_ratelimited_flags() function mm: Move updates of dirty_exceeded into one place mm: Move starting of background writeback into the main balancing loop
Diffstat (limited to 'io_uring')
-rw-r--r--io_uring/rw.c41
1 files changed, 35 insertions, 6 deletions
diff --git a/io_uring/rw.c b/io_uring/rw.c
index c50a0d66d67a..2b784795103c 100644
--- a/io_uring/rw.c
+++ b/io_uring/rw.c
@@ -641,7 +641,7 @@ static inline int io_iter_do_read(struct io_rw *rw, struct iov_iter *iter)
return -EINVAL;
}
-static bool need_read_all(struct io_kiocb *req)
+static bool need_complete_io(struct io_kiocb *req)
{
return req->flags & REQ_F_ISREG ||
S_ISBLK(file_inode(req->file)->i_mode);
@@ -775,7 +775,7 @@ int io_read(struct io_kiocb *req, unsigned int issue_flags)
kfree(iovec);
return IOU_ISSUE_SKIP_COMPLETE;
} else if (ret == req->cqe.res || ret <= 0 || !force_nonblock ||
- (req->flags & REQ_F_NOWAIT) || !need_read_all(req)) {
+ (req->flags & REQ_F_NOWAIT) || !need_complete_io(req)) {
/* read all, failed, already did sync or don't want to retry */
goto done;
}
@@ -870,9 +870,10 @@ int io_write(struct io_kiocb *req, unsigned int issue_flags)
if (unlikely(!io_file_supports_nowait(req)))
goto copy_iov;
- /* file path doesn't support NOWAIT for non-direct_IO */
- if (force_nonblock && !(kiocb->ki_flags & IOCB_DIRECT) &&
- (req->flags & REQ_F_ISREG))
+ /* File path supports NOWAIT for non-direct_IO only for block devices. */
+ if (!(kiocb->ki_flags & IOCB_DIRECT) &&
+ !(kiocb->ki_filp->f_mode & FMODE_BUF_WASYNC) &&
+ (req->flags & REQ_F_ISREG))
goto copy_iov;
kiocb->ki_flags |= IOCB_NOWAIT;
@@ -928,13 +929,41 @@ int io_write(struct io_kiocb *req, unsigned int issue_flags)
/* IOPOLL retry should happen for io-wq threads */
if (ret2 == -EAGAIN && (req->ctx->flags & IORING_SETUP_IOPOLL))
goto copy_iov;
+
+ if (ret2 != req->cqe.res && ret2 >= 0 && need_complete_io(req)) {
+ struct io_async_rw *rw;
+
+ trace_io_uring_short_write(req->ctx, kiocb->ki_pos - ret2,
+ req->cqe.res, ret2);
+
+ /* This is a partial write. The file pos has already been
+ * updated, setup the async struct to complete the request
+ * in the worker. Also update bytes_done to account for
+ * the bytes already written.
+ */
+ iov_iter_save_state(&s->iter, &s->iter_state);
+ ret = io_setup_async_rw(req, iovec, s, true);
+
+ rw = req->async_data;
+ if (rw)
+ rw->bytes_done += ret2;
+
+ if (kiocb->ki_flags & IOCB_WRITE)
+ kiocb_end_write(req);
+ return ret ? ret : -EAGAIN;
+ }
done:
ret = kiocb_done(req, ret2, issue_flags);
} else {
copy_iov:
iov_iter_restore(&s->iter, &s->iter_state);
ret = io_setup_async_rw(req, iovec, s, false);
- return ret ?: -EAGAIN;
+ if (!ret) {
+ if (kiocb->ki_flags & IOCB_WRITE)
+ kiocb_end_write(req);
+ return -EAGAIN;
+ }
+ return ret;
}
/* it's reportedly faster than delegating the null check to kfree() */
if (iovec)