aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2019-10-18 22:29:36 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2019-10-18 22:29:36 -0400
commitd418d070057c45fd6f21567278f95452bfe690d1 (patch)
tree24a02ac68321cc119fbcba426e5914dc50c78dce /fs
parentMerge tag 'riscv/for-v5.4-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/riscv/linux (diff)
parentMerge branch 'nvme-5.4' of git://git.infradead.org/nvme into for-linus (diff)
downloadlinux-dev-d418d070057c45fd6f21567278f95452bfe690d1.tar.xz
linux-dev-d418d070057c45fd6f21567278f95452bfe690d1.zip
Merge tag 'for-linus-2019-10-18' of git://git.kernel.dk/linux-block
Pull block fixes from Jens Axboe: - NVMe pull request from Keith that address deadlocks, double resets, memory leaks, and other regression. - Fixup elv_support_iosched() for bio based devices (Damien) - Fixup for the ahci PCS quirk (Dan) - Socket O_NONBLOCK handling fix for io_uring (me) - Timeout sequence io_uring fixes (yangerkun) - MD warning fix for parameter default_layout (Song) - blkcg activation fixes (Tejun) - blk-rq-qos node deletion fix (Tejun) * tag 'for-linus-2019-10-18' of git://git.kernel.dk/linux-block: nvme-pci: Set the prp2 correctly when using more than 4k page io_uring: fix logic error in io_timeout io_uring: fix up O_NONBLOCK handling for sockets md/raid0: fix warning message for parameter default_layout libata/ahci: Fix PCS quirk application blk-rq-qos: fix first node deletion of rq_qos_del() blkcg: Fix multiple bugs in blkcg_activate_policy() io_uring: consider the overflow of sequence for timeout req nvme-tcp: fix possible leakage during error flow nvmet-loop: fix possible leakage during error flow block: Fix elv_support_iosched() nvme-tcp: Initialize sk->sk_ll_usec only with NET_RX_BUSY_POLL nvme: Wait for reset state when required nvme: Prevent resets during paused controller state nvme: Restart request timers in resetting state nvme: Remove ADMIN_ONLY state nvme-pci: Free tagset if no IO queues nvme: retain split access workaround for capability reads nvme: fix possible deadlock when nvme_update_formats fails
Diffstat (limited to 'fs')
-rw-r--r--fs/io_uring.c84
1 files changed, 60 insertions, 24 deletions
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 76fdbe84aff5..67dbe0201e0d 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -322,6 +322,8 @@ struct io_kiocb {
#define REQ_F_FAIL_LINK 256 /* fail rest of links */
#define REQ_F_SHADOW_DRAIN 512 /* link-drain shadow req */
#define REQ_F_TIMEOUT 1024 /* timeout request */
+#define REQ_F_ISREG 2048 /* regular file */
+#define REQ_F_MUST_PUNT 4096 /* must be punted even for NONBLOCK */
u64 user_data;
u32 result;
u32 sequence;
@@ -914,26 +916,26 @@ static int io_iopoll_check(struct io_ring_ctx *ctx, unsigned *nr_events,
return ret;
}
-static void kiocb_end_write(struct kiocb *kiocb)
+static void kiocb_end_write(struct io_kiocb *req)
{
- if (kiocb->ki_flags & IOCB_WRITE) {
- struct inode *inode = file_inode(kiocb->ki_filp);
+ /*
+ * Tell lockdep we inherited freeze protection from submission
+ * thread.
+ */
+ if (req->flags & REQ_F_ISREG) {
+ struct inode *inode = file_inode(req->file);
- /*
- * Tell lockdep we inherited freeze protection from submission
- * thread.
- */
- if (S_ISREG(inode->i_mode))
- __sb_writers_acquired(inode->i_sb, SB_FREEZE_WRITE);
- file_end_write(kiocb->ki_filp);
+ __sb_writers_acquired(inode->i_sb, SB_FREEZE_WRITE);
}
+ file_end_write(req->file);
}
static void io_complete_rw(struct kiocb *kiocb, long res, long res2)
{
struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw);
- kiocb_end_write(kiocb);
+ if (kiocb->ki_flags & IOCB_WRITE)
+ kiocb_end_write(req);
if ((req->flags & REQ_F_LINK) && res != req->result)
req->flags |= REQ_F_FAIL_LINK;
@@ -945,7 +947,8 @@ static void io_complete_rw_iopoll(struct kiocb *kiocb, long res, long res2)
{
struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw);
- kiocb_end_write(kiocb);
+ if (kiocb->ki_flags & IOCB_WRITE)
+ kiocb_end_write(req);
if ((req->flags & REQ_F_LINK) && res != req->result)
req->flags |= REQ_F_FAIL_LINK;
@@ -1059,8 +1062,17 @@ static int io_prep_rw(struct io_kiocb *req, const struct sqe_submit *s,
if (!req->file)
return -EBADF;
- if (force_nonblock && !io_file_supports_async(req->file))
- force_nonblock = false;
+ if (S_ISREG(file_inode(req->file)->i_mode))
+ req->flags |= REQ_F_ISREG;
+
+ /*
+ * If the file doesn't support async, mark it as REQ_F_MUST_PUNT so
+ * we know to async punt it even if it was opened O_NONBLOCK
+ */
+ if (force_nonblock && !io_file_supports_async(req->file)) {
+ req->flags |= REQ_F_MUST_PUNT;
+ return -EAGAIN;
+ }
kiocb->ki_pos = READ_ONCE(sqe->off);
kiocb->ki_flags = iocb_flags(kiocb->ki_filp);
@@ -1081,7 +1093,8 @@ static int io_prep_rw(struct io_kiocb *req, const struct sqe_submit *s,
return ret;
/* don't allow async punt if RWF_NOWAIT was requested */
- if (kiocb->ki_flags & IOCB_NOWAIT)
+ if ((kiocb->ki_flags & IOCB_NOWAIT) ||
+ (req->file->f_flags & O_NONBLOCK))
req->flags |= REQ_F_NOWAIT;
if (force_nonblock)
@@ -1382,7 +1395,9 @@ static int io_read(struct io_kiocb *req, const struct sqe_submit *s,
* need async punt anyway, so it's more efficient to do it
* here.
*/
- if (force_nonblock && ret2 > 0 && ret2 < read_size)
+ if (force_nonblock && !(req->flags & REQ_F_NOWAIT) &&
+ (req->flags & REQ_F_ISREG) &&
+ ret2 > 0 && ret2 < read_size)
ret2 = -EAGAIN;
/* Catch -EAGAIN return for forced non-blocking submission */
if (!force_nonblock || ret2 != -EAGAIN) {
@@ -1447,7 +1462,7 @@ static int io_write(struct io_kiocb *req, const struct sqe_submit *s,
* released so that it doesn't complain about the held lock when
* we return to userspace.
*/
- if (S_ISREG(file_inode(file)->i_mode)) {
+ if (req->flags & REQ_F_ISREG) {
__sb_start_write(file_inode(file)->i_sb,
SB_FREEZE_WRITE, true);
__sb_writers_release(file_inode(file)->i_sb,
@@ -1884,7 +1899,7 @@ static enum hrtimer_restart io_timeout_fn(struct hrtimer *timer)
static int io_timeout(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
- unsigned count, req_dist, tail_index;
+ unsigned count;
struct io_ring_ctx *ctx = req->ctx;
struct list_head *entry;
struct timespec64 ts;
@@ -1907,21 +1922,36 @@ static int io_timeout(struct io_kiocb *req, const struct io_uring_sqe *sqe)
count = 1;
req->sequence = ctx->cached_sq_head + count - 1;
+ /* reuse it to store the count */
+ req->submit.sequence = count;
req->flags |= REQ_F_TIMEOUT;
/*
* Insertion sort, ensuring the first entry in the list is always
* the one we need first.
*/
- tail_index = ctx->cached_cq_tail - ctx->rings->sq_dropped;
- req_dist = req->sequence - tail_index;
spin_lock_irq(&ctx->completion_lock);
list_for_each_prev(entry, &ctx->timeout_list) {
struct io_kiocb *nxt = list_entry(entry, struct io_kiocb, list);
- unsigned dist;
+ unsigned nxt_sq_head;
+ long long tmp, tmp_nxt;
- dist = nxt->sequence - tail_index;
- if (req_dist >= dist)
+ /*
+ * Since cached_sq_head + count - 1 can overflow, use type long
+ * long to store it.
+ */
+ tmp = (long long)ctx->cached_sq_head + count - 1;
+ nxt_sq_head = nxt->sequence - nxt->submit.sequence + 1;
+ tmp_nxt = (long long)nxt_sq_head + nxt->submit.sequence - 1;
+
+ /*
+ * cached_sq_head may overflow, and it will never overflow twice
+ * once there is some timeout req still be valid.
+ */
+ if (ctx->cached_sq_head < nxt_sq_head)
+ tmp += UINT_MAX;
+
+ if (tmp >= tmp_nxt)
break;
}
list_add(&req->list, entry);
@@ -2267,7 +2297,13 @@ static int __io_queue_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
int ret;
ret = __io_submit_sqe(ctx, req, s, force_nonblock);
- if (ret == -EAGAIN && !(req->flags & REQ_F_NOWAIT)) {
+
+ /*
+ * We async punt it if the file wasn't marked NOWAIT, or if the file
+ * doesn't support non-blocking read/write attempts
+ */
+ if (ret == -EAGAIN && (!(req->flags & REQ_F_NOWAIT) ||
+ (req->flags & REQ_F_MUST_PUNT))) {
struct io_uring_sqe *sqe_copy;
sqe_copy = kmemdup(s->sqe, sizeof(*sqe_copy), GFP_KERNEL);