From dccc587f6c07ccc734588226fdf62f685558e89f Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Sun, 12 Apr 2020 02:05:01 +0300 Subject: io_uring: remove obsolete @mm_fault If io_submit_sqes() can't grab an mm, it fails and exits right away. There is no need to track the fact of the failure. Remove @mm_fault. Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/io_uring.c b/fs/io_uring.c index 5190bfb6a665..81532479c857 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -5818,7 +5818,6 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr, struct io_submit_state state, *statep = NULL; struct io_kiocb *link = NULL; int i, submitted = 0; - bool mm_fault = false; /* if we have a backlog and couldn't flush it all, return BUSY */ if (test_bit(0, &ctx->sq_check_overflow)) { @@ -5872,8 +5871,7 @@ fail_req: } if (io_op_defs[req->opcode].needs_mm && !*mm) { - mm_fault = mm_fault || !mmget_not_zero(ctx->sqo_mm); - if (unlikely(mm_fault)) { + if (unlikely(!mmget_not_zero(ctx->sqo_mm))) { err = -EFAULT; goto fail_req; } -- cgit v1.2.3-59-g8ed1b From bf9c2f1cdcc718b6d2d41172f6ca005fe22cc7ff Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Sun, 12 Apr 2020 02:05:02 +0300 Subject: io_uring: track mm through current->mm As a preparation for extracting request init bits, remove self-coded mm tracking from io_submit_sqes(), but rely on current->mm. It's more convenient, than passing this piece of state in other functions. Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 37 ++++++++++++++++--------------------- 1 file changed, 16 insertions(+), 21 deletions(-) (limited to 'fs') diff --git a/fs/io_uring.c b/fs/io_uring.c index 81532479c857..f7825d3de400 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -5812,8 +5812,7 @@ static void io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req, } static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr, - struct file *ring_file, int ring_fd, - struct mm_struct **mm, bool async) + struct file *ring_file, int ring_fd, bool async) { struct io_submit_state state, *statep = NULL; struct io_kiocb *link = NULL; @@ -5870,13 +5869,12 @@ fail_req: break; } - if (io_op_defs[req->opcode].needs_mm && !*mm) { + if (io_op_defs[req->opcode].needs_mm && !current->mm) { if (unlikely(!mmget_not_zero(ctx->sqo_mm))) { err = -EFAULT; goto fail_req; } use_mm(ctx->sqo_mm); - *mm = ctx->sqo_mm; } req->needs_fixed_file = async; @@ -5902,10 +5900,19 @@ fail_req: return submitted; } +static inline void io_sq_thread_drop_mm(struct io_ring_ctx *ctx) +{ + struct mm_struct *mm = current->mm; + + if (mm) { + unuse_mm(mm); + mmput(mm); + } +} + static int io_sq_thread(void *data) { struct io_ring_ctx *ctx = data; - struct mm_struct *cur_mm = NULL; const struct cred *old_cred; mm_segment_t old_fs; DEFINE_WAIT(wait); @@ -5946,11 +5953,7 @@ static int io_sq_thread(void *data) * adding ourselves to the waitqueue, as the unuse/drop * may sleep. */ - if (cur_mm) { - unuse_mm(cur_mm); - mmput(cur_mm); - cur_mm = NULL; - } + io_sq_thread_drop_mm(ctx); /* * We're polling. If we're within the defined idle @@ -6014,7 +6017,7 @@ static int io_sq_thread(void *data) } mutex_lock(&ctx->uring_lock); - ret = io_submit_sqes(ctx, to_submit, NULL, -1, &cur_mm, true); + ret = io_submit_sqes(ctx, to_submit, NULL, -1, true); mutex_unlock(&ctx->uring_lock); timeout = jiffies + ctx->sq_thread_idle; } @@ -6023,10 +6026,7 @@ static int io_sq_thread(void *data) task_work_run(); set_fs(old_fs); - if (cur_mm) { - unuse_mm(cur_mm); - mmput(cur_mm); - } + io_sq_thread_drop_mm(ctx); revert_creds(old_cred); kthread_parkme(); @@ -7507,13 +7507,8 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit, wake_up(&ctx->sqo_wait); submitted = to_submit; } else if (to_submit) { - struct mm_struct *cur_mm; - mutex_lock(&ctx->uring_lock); - /* already have mm, so io_submit_sqes() won't try to grab it */ - cur_mm = ctx->sqo_mm; - submitted = io_submit_sqes(ctx, to_submit, f.file, fd, - &cur_mm, false); + submitted = io_submit_sqes(ctx, to_submit, f.file, fd, false); mutex_unlock(&ctx->uring_lock); if (submitted != to_submit) -- cgit v1.2.3-59-g8ed1b From 1d4240cc9e7bb101dac58f30283fa24a809f5606 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Sun, 12 Apr 2020 02:05:03 +0300 Subject: io_uring: early submission req fail code Having only one place for cleaning up a request after a link assembly/ submission failure will play handy in the future. At least it allows to remove duplicated cleanup sequence. Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 50 +++++++++++++++++++------------------------------- 1 file changed, 19 insertions(+), 31 deletions(-) (limited to 'fs') diff --git a/fs/io_uring.c b/fs/io_uring.c index f7825d3de400..ff10bd49a619 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -5608,7 +5608,7 @@ static inline void io_queue_link_head(struct io_kiocb *req) IOSQE_IO_HARDLINK | IOSQE_ASYNC | \ IOSQE_BUFFER_SELECT) -static bool io_submit_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe, +static int io_submit_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe, struct io_submit_state *state, struct io_kiocb **link) { struct io_ring_ctx *ctx = req->ctx; @@ -5618,24 +5618,18 @@ static bool io_submit_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe, sqe_flags = READ_ONCE(sqe->flags); /* enforce forwards compatibility on users */ - if (unlikely(sqe_flags & ~SQE_VALID_FLAGS)) { - ret = -EINVAL; - goto err_req; - } + if (unlikely(sqe_flags & ~SQE_VALID_FLAGS)) + return -EINVAL; if ((sqe_flags & IOSQE_BUFFER_SELECT) && - !io_op_defs[req->opcode].buffer_select) { - ret = -EOPNOTSUPP; - goto err_req; - } + !io_op_defs[req->opcode].buffer_select) + return -EOPNOTSUPP; id = READ_ONCE(sqe->personality); if (id) { req->work.creds = idr_find(&ctx->personality_idr, id); - if (unlikely(!req->work.creds)) { - ret = -EINVAL; - goto err_req; - } + if (unlikely(!req->work.creds)) + return -EINVAL; get_cred(req->work.creds); } @@ -5646,12 +5640,8 @@ static bool io_submit_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe, fd = READ_ONCE(sqe->fd); ret = io_req_set_file(state, req, fd, sqe_flags); - if (unlikely(ret)) { -err_req: - io_cqring_add_event(req, ret); - io_double_put_req(req); - return false; - } + if (unlikely(ret)) + return ret; /* * If we already have a head request, queue this one for async @@ -5674,16 +5664,14 @@ err_req: head->flags |= REQ_F_IO_DRAIN; ctx->drain_next = 1; } - if (io_alloc_async_ctx(req)) { - ret = -EAGAIN; - goto err_req; - } + if (io_alloc_async_ctx(req)) + return -EAGAIN; ret = io_req_defer_prep(req, sqe); if (ret) { /* fail even hard links since we don't submit */ head->flags |= REQ_F_FAIL_LINK; - goto err_req; + return ret; } trace_io_uring_link(ctx, req, head); list_add_tail(&req->link_list, &head->link_list); @@ -5702,10 +5690,9 @@ err_req: req->flags |= REQ_F_LINK; INIT_LIST_HEAD(&req->link_list); - if (io_alloc_async_ctx(req)) { - ret = -EAGAIN; - goto err_req; - } + if (io_alloc_async_ctx(req)) + return -EAGAIN; + ret = io_req_defer_prep(req, sqe); if (ret) req->flags |= REQ_F_FAIL_LINK; @@ -5715,7 +5702,7 @@ err_req: } } - return true; + return 0; } /* @@ -5880,8 +5867,9 @@ fail_req: req->needs_fixed_file = async; trace_io_uring_submit_sqe(ctx, req->opcode, req->user_data, true, async); - if (!io_submit_sqe(req, sqe, statep, &link)) - break; + err = io_submit_sqe(req, sqe, statep, &link); + if (err) + goto fail_req; } if (unlikely(submitted != nr)) { -- cgit v1.2.3-59-g8ed1b From dea3b49c7fb09b4f6b6a574c0485ffeb9df7b69c Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Sun, 12 Apr 2020 02:05:04 +0300 Subject: io_uring: keep all sqe->flags in req->flags It's a good idea to not read sqe->flags twice, as it's prone to security bugs. Instead of passing it around, embeed them in req->flags. It's already so except for IOSQE_IO_LINK. 1. rename former REQ_F_LINK -> REQ_F_LINK_HEAD 2. introduce and copy REQ_F_LINK, which mimics IO_IOSQE_LINK And leave req_set_fail_links() using new REQ_F_LINK, because it's more sensible. Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) (limited to 'fs') diff --git a/fs/io_uring.c b/fs/io_uring.c index ff10bd49a619..b0e1bdfe0a43 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -508,6 +508,7 @@ enum { REQ_F_FORCE_ASYNC_BIT = IOSQE_ASYNC_BIT, REQ_F_BUFFER_SELECT_BIT = IOSQE_BUFFER_SELECT_BIT, + REQ_F_LINK_HEAD_BIT, REQ_F_LINK_NEXT_BIT, REQ_F_FAIL_LINK_BIT, REQ_F_INFLIGHT_BIT, @@ -543,6 +544,8 @@ enum { /* IOSQE_BUFFER_SELECT */ REQ_F_BUFFER_SELECT = BIT(REQ_F_BUFFER_SELECT_BIT), + /* head of a link */ + REQ_F_LINK_HEAD = BIT(REQ_F_LINK_HEAD_BIT), /* already grabbed next link */ REQ_F_LINK_NEXT = BIT(REQ_F_LINK_NEXT_BIT), /* fail rest of links */ @@ -1437,7 +1440,7 @@ static bool io_link_cancel_timeout(struct io_kiocb *req) if (ret != -1) { io_cqring_fill_event(req, -ECANCELED); io_commit_cqring(ctx); - req->flags &= ~REQ_F_LINK; + req->flags &= ~REQ_F_LINK_HEAD; io_put_req(req); return true; } @@ -1473,7 +1476,7 @@ static void io_req_link_next(struct io_kiocb *req, struct io_kiocb **nxtptr) list_del_init(&req->link_list); if (!list_empty(&nxt->link_list)) - nxt->flags |= REQ_F_LINK; + nxt->flags |= REQ_F_LINK_HEAD; *nxtptr = nxt; break; } @@ -1484,7 +1487,7 @@ static void io_req_link_next(struct io_kiocb *req, struct io_kiocb **nxtptr) } /* - * Called if REQ_F_LINK is set, and we fail the head request + * Called if REQ_F_LINK_HEAD is set, and we fail the head request */ static void io_fail_links(struct io_kiocb *req) { @@ -1517,7 +1520,7 @@ static void io_fail_links(struct io_kiocb *req) static void io_req_find_next(struct io_kiocb *req, struct io_kiocb **nxt) { - if (likely(!(req->flags & REQ_F_LINK))) + if (likely(!(req->flags & REQ_F_LINK_HEAD))) return; /* @@ -1669,7 +1672,7 @@ static inline unsigned int io_sqring_entries(struct io_ring_ctx *ctx) static inline bool io_req_multi_free(struct req_batch *rb, struct io_kiocb *req) { - if ((req->flags & REQ_F_LINK) || io_is_fallback_req(req)) + if ((req->flags & REQ_F_LINK_HEAD) || io_is_fallback_req(req)) return false; if (!(req->flags & REQ_F_FIXED_FILE) || req->io) @@ -2562,7 +2565,7 @@ static int io_read(struct io_kiocb *req, bool force_nonblock) req->result = 0; io_size = ret; - if (req->flags & REQ_F_LINK) + if (req->flags & REQ_F_LINK_HEAD) req->result = io_size; /* @@ -2653,7 +2656,7 @@ static int io_write(struct io_kiocb *req, bool force_nonblock) req->result = 0; io_size = ret; - if (req->flags & REQ_F_LINK) + if (req->flags & REQ_F_LINK_HEAD) req->result = io_size; /* @@ -5476,7 +5479,7 @@ static struct io_kiocb *io_prep_linked_timeout(struct io_kiocb *req) { struct io_kiocb *nxt; - if (!(req->flags & REQ_F_LINK)) + if (!(req->flags & REQ_F_LINK_HEAD)) return NULL; /* for polled retry, if flag is set, we already went through here */ if (req->flags & REQ_F_POLLED) @@ -5636,7 +5639,7 @@ static int io_submit_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe, /* same numerical values with corresponding REQ_F_*, safe to copy */ req->flags |= sqe_flags & (IOSQE_IO_DRAIN | IOSQE_IO_HARDLINK | IOSQE_ASYNC | IOSQE_FIXED_FILE | - IOSQE_BUFFER_SELECT); + IOSQE_BUFFER_SELECT | IOSQE_IO_LINK); fd = READ_ONCE(sqe->fd); ret = io_req_set_file(state, req, fd, sqe_flags); @@ -5687,7 +5690,7 @@ static int io_submit_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe, req->ctx->drain_next = 0; } if (sqe_flags & (IOSQE_IO_LINK|IOSQE_IO_HARDLINK)) { - req->flags |= REQ_F_LINK; + req->flags |= REQ_F_LINK_HEAD; INIT_LIST_HEAD(&req->link_list); if (io_alloc_async_ctx(req)) -- cgit v1.2.3-59-g8ed1b From ef4ff581102a917a69877feca2e5347e2f3e458c Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Sun, 12 Apr 2020 02:05:05 +0300 Subject: io_uring: move all request init code in one place Requests initialisation is scattered across several functions, namely io_init_req(), io_submit_sqes(), io_submit_sqe(). Put it in io_init_req() for better data locality and code clarity. Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 104 +++++++++++++++++++++++++++++----------------------------- 1 file changed, 52 insertions(+), 52 deletions(-) (limited to 'fs') diff --git a/fs/io_uring.c b/fs/io_uring.c index b0e1bdfe0a43..c0cf57764329 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -5607,44 +5607,11 @@ static inline void io_queue_link_head(struct io_kiocb *req) io_queue_sqe(req, NULL); } -#define SQE_VALID_FLAGS (IOSQE_FIXED_FILE|IOSQE_IO_DRAIN|IOSQE_IO_LINK| \ - IOSQE_IO_HARDLINK | IOSQE_ASYNC | \ - IOSQE_BUFFER_SELECT) - static int io_submit_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe, struct io_submit_state *state, struct io_kiocb **link) { struct io_ring_ctx *ctx = req->ctx; - unsigned int sqe_flags; - int ret, id, fd; - - sqe_flags = READ_ONCE(sqe->flags); - - /* enforce forwards compatibility on users */ - if (unlikely(sqe_flags & ~SQE_VALID_FLAGS)) - return -EINVAL; - - if ((sqe_flags & IOSQE_BUFFER_SELECT) && - !io_op_defs[req->opcode].buffer_select) - return -EOPNOTSUPP; - - id = READ_ONCE(sqe->personality); - if (id) { - req->work.creds = idr_find(&ctx->personality_idr, id); - if (unlikely(!req->work.creds)) - return -EINVAL; - get_cred(req->work.creds); - } - - /* same numerical values with corresponding REQ_F_*, safe to copy */ - req->flags |= sqe_flags & (IOSQE_IO_DRAIN | IOSQE_IO_HARDLINK | - IOSQE_ASYNC | IOSQE_FIXED_FILE | - IOSQE_BUFFER_SELECT | IOSQE_IO_LINK); - - fd = READ_ONCE(sqe->fd); - ret = io_req_set_file(state, req, fd, sqe_flags); - if (unlikely(ret)) - return ret; + int ret; /* * If we already have a head request, queue this one for async @@ -5663,7 +5630,7 @@ static int io_submit_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe, * next after the link request. The last one is done via * drain_next flag to persist the effect across calls. */ - if (sqe_flags & IOSQE_IO_DRAIN) { + if (req->flags & REQ_F_IO_DRAIN) { head->flags |= REQ_F_IO_DRAIN; ctx->drain_next = 1; } @@ -5680,16 +5647,16 @@ static int io_submit_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe, list_add_tail(&req->link_list, &head->link_list); /* last request of a link, enqueue the link */ - if (!(sqe_flags & (IOSQE_IO_LINK|IOSQE_IO_HARDLINK))) { + if (!(req->flags & (REQ_F_LINK | REQ_F_HARDLINK))) { io_queue_link_head(head); *link = NULL; } } else { if (unlikely(ctx->drain_next)) { req->flags |= REQ_F_IO_DRAIN; - req->ctx->drain_next = 0; + ctx->drain_next = 0; } - if (sqe_flags & (IOSQE_IO_LINK|IOSQE_IO_HARDLINK)) { + if (req->flags & (REQ_F_LINK | REQ_F_HARDLINK)) { req->flags |= REQ_F_LINK_HEAD; INIT_LIST_HEAD(&req->link_list); @@ -5779,9 +5746,17 @@ static inline void io_consume_sqe(struct io_ring_ctx *ctx) ctx->cached_sq_head++; } -static void io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req, - const struct io_uring_sqe *sqe) +#define SQE_VALID_FLAGS (IOSQE_FIXED_FILE|IOSQE_IO_DRAIN|IOSQE_IO_LINK| \ + IOSQE_IO_HARDLINK | IOSQE_ASYNC | \ + IOSQE_BUFFER_SELECT) + +static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req, + const struct io_uring_sqe *sqe, + struct io_submit_state *state, bool async) { + unsigned int sqe_flags; + int id, fd; + /* * All io need record the previous position, if LINK vs DARIN, * it can be used to mark the position of the first IO in the @@ -5798,7 +5773,42 @@ static void io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req, refcount_set(&req->refs, 2); req->task = NULL; req->result = 0; + req->needs_fixed_file = async; INIT_IO_WORK(&req->work, io_wq_submit_work); + + if (unlikely(req->opcode >= IORING_OP_LAST)) + return -EINVAL; + + if (io_op_defs[req->opcode].needs_mm && !current->mm) { + if (unlikely(!mmget_not_zero(ctx->sqo_mm))) + return -EFAULT; + use_mm(ctx->sqo_mm); + } + + sqe_flags = READ_ONCE(sqe->flags); + /* enforce forwards compatibility on users */ + if (unlikely(sqe_flags & ~SQE_VALID_FLAGS)) + return -EINVAL; + + if ((sqe_flags & IOSQE_BUFFER_SELECT) && + !io_op_defs[req->opcode].buffer_select) + return -EOPNOTSUPP; + + id = READ_ONCE(sqe->personality); + if (id) { + req->work.creds = idr_find(&ctx->personality_idr, id); + if (unlikely(!req->work.creds)) + return -EINVAL; + get_cred(req->work.creds); + } + + /* same numerical values with corresponding REQ_F_*, safe to copy */ + req->flags |= sqe_flags & (IOSQE_IO_DRAIN | IOSQE_IO_HARDLINK | + IOSQE_ASYNC | IOSQE_FIXED_FILE | + IOSQE_BUFFER_SELECT | IOSQE_IO_LINK); + + fd = READ_ONCE(sqe->fd); + return io_req_set_file(state, req, fd, sqe_flags); } static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr, @@ -5846,28 +5856,18 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr, break; } - io_init_req(ctx, req, sqe); + err = io_init_req(ctx, req, sqe, statep, async); io_consume_sqe(ctx); /* will complete beyond this point, count as submitted */ submitted++; - if (unlikely(req->opcode >= IORING_OP_LAST)) { - err = -EINVAL; + if (unlikely(err)) { fail_req: io_cqring_add_event(req, err); io_double_put_req(req); break; } - if (io_op_defs[req->opcode].needs_mm && !current->mm) { - if (unlikely(!mmget_not_zero(ctx->sqo_mm))) { - err = -EFAULT; - goto fail_req; - } - use_mm(ctx->sqo_mm); - } - - req->needs_fixed_file = async; trace_io_uring_submit_sqe(ctx, req->opcode, req->user_data, true, async); err = io_submit_sqe(req, sqe, statep, &link); -- cgit v1.2.3-59-g8ed1b From b1f573bd15fda2e19ea66a4d26fae8be1b12791d Mon Sep 17 00:00:00 2001 From: Xiaoguang Wang Date: Sun, 12 Apr 2020 14:50:54 +0800 Subject: io_uring: restore req->work when canceling poll request When running liburing test case 'accept', I got below warning: RED: Invalid credentials RED: At include/linux/cred.h:285 RED: Specified credentials: 00000000d02474a0 RED: ->magic=4b, put_addr=000000005b4f46e9 RED: ->usage=-1699227648, subscr=-25693 RED: ->*uid = { 256,-25693,-25693,65534 } RED: ->*gid = { 0,-1925859360,-1789740800,-1827028688 } RED: ->security is 00000000258c136e eneral protection fault, probably for non-canonical address 0xdead4ead00000000: 0000 [#1] SMP PTI PU: 21 PID: 2037 Comm: accept Not tainted 5.6.0+ #318 ardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.11.1-0-g0551a4be2c-prebuilt.qemu-project.org 04/01/2014 IP: 0010:dump_invalid_creds+0x16f/0x184 ode: 48 8b 83 88 00 00 00 48 3d ff 0f 00 00 76 29 48 89 c2 81 e2 00 ff ff ff 48 81 fa 00 6b 6b 6b 74 17 5b 48 c7 c7 4b b1 10 8e 5d <8b> 50 04 41 5c 8b 30 41 5d e9 67 e3 04 00 5b 5d 41 5c 41 5d c3 0f SP: 0018:ffffacc1039dfb38 EFLAGS: 00010087 AX: dead4ead00000000 RBX: ffff9ba39319c100 RCX: 0000000000000007 DX: 0000000000000000 RSI: 0000000000000000 RDI: ffffffff8e10b14b BP: ffffffff8e108476 R08: 0000000000000000 R09: 0000000000000001 10: 0000000000000000 R11: ffffacc1039df9e5 R12: 000000009552b900 13: 000000009319c130 R14: ffff9ba39319c100 R15: 0000000000000246 S: 00007f96b2bfc4c0(0000) GS:ffff9ba39f340000(0000) knlGS:0000000000000000 S: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 R2: 0000000000401870 CR3: 00000007db7a4000 CR4: 00000000000006e0 all Trace: __invalid_creds+0x48/0x4a __io_req_aux_free+0x2e8/0x3b0 ? io_poll_remove_one+0x2a/0x1d0 __io_free_req+0x18/0x200 io_free_req+0x31/0x350 io_poll_remove_one+0x17f/0x1d0 io_poll_cancel.isra.80+0x6c/0x80 io_async_find_and_cancel+0x111/0x120 io_issue_sqe+0x181/0x10e0 ? __lock_acquire+0x552/0xae0 ? lock_acquire+0x8e/0x310 ? fs_reclaim_acquire.part.97+0x5/0x30 __io_queue_sqe.part.100+0xc4/0x580 ? io_submit_sqes+0x751/0xbd0 ? rcu_read_lock_sched_held+0x32/0x40 io_submit_sqes+0x9ba/0xbd0 ? __x64_sys_io_uring_enter+0x2b2/0x460 ? __x64_sys_io_uring_enter+0xaf/0x460 ? find_held_lock+0x2d/0x90 ? __x64_sys_io_uring_enter+0x111/0x460 __x64_sys_io_uring_enter+0x2d7/0x460 do_syscall_64+0x5a/0x230 entry_SYSCALL_64_after_hwframe+0x49/0xb3 After looking into codes, it turns out that this issue is because we didn't restore the req->work, which is changed in io_arm_poll_handler(), req->work is a union with below struct: struct { struct callback_head task_work; struct hlist_node hash_node; struct async_poll *apoll; }; If we forget to restore, members in struct io_wq_work would be invalid, restore the req->work to fix this issue. Signed-off-by: Xiaoguang Wang Get rid of not needed 'need_restore' variable. Signed-off-by: Jens Axboe --- fs/io_uring.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'fs') diff --git a/fs/io_uring.c b/fs/io_uring.c index c0cf57764329..68a678a0056b 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -4318,11 +4318,13 @@ static bool __io_poll_remove_one(struct io_kiocb *req, static bool io_poll_remove_one(struct io_kiocb *req) { + struct async_poll *apoll = NULL; bool do_complete; if (req->opcode == IORING_OP_POLL_ADD) { do_complete = __io_poll_remove_one(req, &req->poll); } else { + apoll = req->apoll; /* non-poll requests have submit ref still */ do_complete = __io_poll_remove_one(req, &req->apoll->poll); if (do_complete) @@ -4331,6 +4333,14 @@ static bool io_poll_remove_one(struct io_kiocb *req) hash_del(&req->hash_node); + if (apoll) { + /* + * restore ->work because we need to call io_req_work_drop_env. + */ + memcpy(&req->work, &apoll->work, sizeof(req->work)); + kfree(apoll); + } + if (do_complete) { io_cqring_fill_event(req, -ECANCELED); io_commit_cqring(req->ctx); -- cgit v1.2.3-59-g8ed1b From 88357580854aab29d27e1a443575caaedd081612 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sun, 12 Apr 2020 21:12:49 -0600 Subject: io_uring: correct O_NONBLOCK check for splice punt The splice file punt check uses file->f_mode to check for O_NONBLOCK, but it should be checking file->f_flags. This leads to punting even for files that have O_NONBLOCK set, which isn't necessary. This equates to checking for FMODE_PATH, which will never be set on the fd in question. Fixes: 7d67af2c0134 ("io_uring: add splice(2) support") Signed-off-by: Jens Axboe --- fs/io_uring.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/io_uring.c b/fs/io_uring.c index 68a678a0056b..0d1b5d5f1251 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -2763,7 +2763,7 @@ static bool io_splice_punt(struct file *file) return false; if (!io_file_supports_async(file)) return true; - return !(file->f_mode & O_NONBLOCK); + return !(file->f_flags & O_NONBLOCK); } static int io_splice(struct io_kiocb *req, bool force_nonblock) -- cgit v1.2.3-59-g8ed1b From 74ce6ce43d4fc6ce15efb21378d9ef26125c298b Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 13 Apr 2020 11:09:12 -0600 Subject: io_uring: check for need to re-wait in polled async handling We added this for just the regular poll requests in commit a6ba632d2c24 ("io_uring: retry poll if we got woken with non-matching mask"), we should do the same for the poll handler used pollable async requests. Move the re-wait check and arm into a helper, and call it from io_async_task_func() as well. Signed-off-by: Jens Axboe --- fs/io_uring.c | 43 +++++++++++++++++++++++++++++-------------- 1 file changed, 29 insertions(+), 14 deletions(-) (limited to 'fs') diff --git a/fs/io_uring.c b/fs/io_uring.c index 0d1b5d5f1251..7b41f6231955 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -4156,6 +4156,26 @@ static int __io_async_wake(struct io_kiocb *req, struct io_poll_iocb *poll, return 1; } +static bool io_poll_rewait(struct io_kiocb *req, struct io_poll_iocb *poll) + __acquires(&req->ctx->completion_lock) +{ + struct io_ring_ctx *ctx = req->ctx; + + if (!req->result && !READ_ONCE(poll->canceled)) { + struct poll_table_struct pt = { ._key = poll->events }; + + req->result = vfs_poll(req->file, &pt) & poll->events; + } + + spin_lock_irq(&ctx->completion_lock); + if (!req->result && !READ_ONCE(poll->canceled)) { + add_wait_queue(poll->head, &poll->wait); + return true; + } + + return false; +} + static void io_async_task_func(struct callback_head *cb) { struct io_kiocb *req = container_of(cb, struct io_kiocb, task_work); @@ -4164,14 +4184,16 @@ static void io_async_task_func(struct callback_head *cb) trace_io_uring_task_run(req->ctx, req->opcode, req->user_data); - WARN_ON_ONCE(!list_empty(&req->apoll->poll.wait.entry)); - - if (hash_hashed(&req->hash_node)) { - spin_lock_irq(&ctx->completion_lock); - hash_del(&req->hash_node); + if (io_poll_rewait(req, &apoll->poll)) { spin_unlock_irq(&ctx->completion_lock); + return; } + if (hash_hashed(&req->hash_node)) + hash_del(&req->hash_node); + + spin_unlock_irq(&ctx->completion_lock); + /* restore ->work in case we need to retry again */ memcpy(&req->work, &apoll->work, sizeof(req->work)); @@ -4436,18 +4458,11 @@ static void io_poll_task_handler(struct io_kiocb *req, struct io_kiocb **nxt) struct io_ring_ctx *ctx = req->ctx; struct io_poll_iocb *poll = &req->poll; - if (!req->result && !READ_ONCE(poll->canceled)) { - struct poll_table_struct pt = { ._key = poll->events }; - - req->result = vfs_poll(req->file, &pt) & poll->events; - } - - spin_lock_irq(&ctx->completion_lock); - if (!req->result && !READ_ONCE(poll->canceled)) { - add_wait_queue(poll->head, &poll->wait); + if (io_poll_rewait(req, poll)) { spin_unlock_irq(&ctx->completion_lock); return; } + hash_del(&req->hash_node); io_poll_complete(req, req->result, 0); req->flags |= REQ_F_COMP_LOCKED; -- cgit v1.2.3-59-g8ed1b From 2bae047ec9576da72d5003487de0bb93e747fff7 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 13 Apr 2020 11:16:34 -0600 Subject: io_uring: io_async_task_func() should check and honor cancelation If the request has been marked as canceled, don't try and issue it. Instead just fill a canceled event and finish the request. Signed-off-by: Jens Axboe --- fs/io_uring.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'fs') diff --git a/fs/io_uring.c b/fs/io_uring.c index 7b41f6231955..aac54772e12e 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -4181,6 +4181,7 @@ static void io_async_task_func(struct callback_head *cb) struct io_kiocb *req = container_of(cb, struct io_kiocb, task_work); struct async_poll *apoll = req->apoll; struct io_ring_ctx *ctx = req->ctx; + bool canceled; trace_io_uring_task_run(req->ctx, req->opcode, req->user_data); @@ -4192,8 +4193,22 @@ static void io_async_task_func(struct callback_head *cb) if (hash_hashed(&req->hash_node)) hash_del(&req->hash_node); + canceled = READ_ONCE(apoll->poll.canceled); + if (canceled) { + io_cqring_fill_event(req, -ECANCELED); + io_commit_cqring(ctx); + } + spin_unlock_irq(&ctx->completion_lock); + if (canceled) { + kfree(apoll); + io_cqring_ev_posted(ctx); + req_set_fail_links(req); + io_put_req(req); + return; + } + /* restore ->work in case we need to retry again */ memcpy(&req->work, &apoll->work, sizeof(req->work)); -- cgit v1.2.3-59-g8ed1b From 8e2e1faf28b3e66430f55f0b0ee83370ecc150af Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 13 Apr 2020 17:05:14 -0600 Subject: io_uring: only post events in io_poll_remove_all() if we completed some MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit syzbot reports this crash: BUG: unable to handle page fault for address: ffffffffffffffe8 PGD f96e17067 P4D f96e17067 PUD f96e19067 PMD 0 Oops: 0000 [#1] SMP DEBUG_PAGEALLOC KASAN PTI CPU: 55 PID: 211750 Comm: trinity-c127 Tainted: G B L 5.7.0-rc1-next-20200413 #4 Hardware name: HP ProLiant DL380 Gen9/ProLiant DL380 Gen9, BIOS P89 04/12/2017 RIP: 0010:__wake_up_common+0x98/0x290 el/sched/wait.c:87 Code: 40 4d 8d 78 e8 49 8d 7f 18 49 39 fd 0f 84 80 00 00 00 e8 6b bd 2b 00 49 8b 5f 18 45 31 e4 48 83 eb 18 4c 89 ff e8 08 bc 2b 00 <45> 8b 37 41 f6 c6 04 75 71 49 8d 7f 10 e8 46 bd 2b 00 49 8b 47 10 RSP: 0018:ffffc9000adbfaf0 EFLAGS: 00010046 RAX: 0000000000000000 RBX: ffffffffffffffe8 RCX: ffffffffaa9636b8 RDX: 0000000000000003 RSI: dffffc0000000000 RDI: ffffffffffffffe8 RBP: ffffc9000adbfb40 R08: fffffbfff582c5fd R09: fffffbfff582c5fd R10: ffffffffac162fe3 R11: fffffbfff582c5fc R12: 0000000000000000 R13: ffff888ef82b0960 R14: ffffc9000adbfb80 R15: ffffffffffffffe8 FS: 00007fdcba4c4740(0000) GS:ffff889033780000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffffffffffffffe8 CR3: 0000000f776a0004 CR4: 00000000001606e0 Call Trace: __wake_up_common_lock+0xea/0x150 ommon_lock at kernel/sched/wait.c:124 ? __wake_up_common+0x290/0x290 ? lockdep_hardirqs_on+0x16/0x2c0 __wake_up+0x13/0x20 io_cqring_ev_posted+0x75/0xe0 v_posted at fs/io_uring.c:1160 io_ring_ctx_wait_and_kill+0x1c0/0x2f0 l at fs/io_uring.c:7305 io_uring_create+0xa8d/0x13b0 ? io_req_defer_prep+0x990/0x990 ? __kasan_check_write+0x14/0x20 io_uring_setup+0xb8/0x130 ? io_uring_create+0x13b0/0x13b0 ? check_flags.part.28+0x220/0x220 ? lockdep_hardirqs_on+0x16/0x2c0 __x64_sys_io_uring_setup+0x31/0x40 do_syscall_64+0xcc/0xaf0 ? syscall_return_slowpath+0x580/0x580 ? lockdep_hardirqs_off+0x1f/0x140 ? entry_SYSCALL_64_after_hwframe+0x3e/0xb3 ? trace_hardirqs_off_caller+0x3a/0x150 ? trace_hardirqs_off_thunk+0x1a/0x1c entry_SYSCALL_64_after_hwframe+0x49/0xb3 RIP: 0033:0x7fdcb9dd76ed Code: 00 c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 6b 57 2c 00 f7 d8 64 89 01 48 RSP: 002b:00007ffe7fd4e4f8 EFLAGS: 00000246 ORIG_RAX: 00000000000001a9 RAX: ffffffffffffffda RBX: 00000000000001a9 RCX: 00007fdcb9dd76ed RDX: fffffffffffffffc RSI: 0000000000000000 RDI: 0000000000005d54 RBP: 00000000000001a9 R08: 0000000e31d3caa7 R09: 0082400004004000 R10: ffffffffffffffff R11: 0000000000000246 R12: 0000000000000002 R13: 00007fdcb842e058 R14: 00007fdcba4c46c0 R15: 00007fdcb842e000 Modules linked in: bridge stp llc nfnetlink cn brd vfat fat ext4 crc16 mbcache jbd2 loop kvm_intel kvm irqbypass intel_cstate intel_uncore dax_pmem intel_rapl_perf dax_pmem_core ip_tables x_tables xfs sd_mod tg3 firmware_class libphy hpsa scsi_transport_sas dm_mirror dm_region_hash dm_log dm_mod [last unloaded: binfmt_misc] CR2: ffffffffffffffe8 ---[ end trace f9502383d57e0e22 ]--- RIP: 0010:__wake_up_common+0x98/0x290 Code: 40 4d 8d 78 e8 49 8d 7f 18 49 39 fd 0f 84 80 00 00 00 e8 6b bd 2b 00 49 8b 5f 18 45 31 e4 48 83 eb 18 4c 89 ff e8 08 bc 2b 00 <45> 8b 37 41 f6 c6 04 75 71 49 8d 7f 10 e8 46 bd 2b 00 49 8b 47 10 RSP: 0018:ffffc9000adbfaf0 EFLAGS: 00010046 RAX: 0000000000000000 RBX: ffffffffffffffe8 RCX: ffffffffaa9636b8 RDX: 0000000000000003 RSI: dffffc0000000000 RDI: ffffffffffffffe8 RBP: ffffc9000adbfb40 R08: fffffbfff582c5fd R09: fffffbfff582c5fd R10: ffffffffac162fe3 R11: fffffbfff582c5fc R12: 0000000000000000 R13: ffff888ef82b0960 R14: ffffc9000adbfb80 R15: ffffffffffffffe8 FS: 00007fdcba4c4740(0000) GS:ffff889033780000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffffffffffffffe8 CR3: 0000000f776a0004 CR4: 00000000001606e0 Kernel panic - not syncing: Fatal exception Kernel Offset: 0x29800000 from 0xffffffff81000000 (relocation range: 0xffffffff80000000-0xffffffffbfffffff) ---[ end Kernel panic - not syncing: Fatal exception ]— which is due to error injection (or allocation failure) preventing the rings from being setup. On shutdown, we attempt to remove any pending requests, and for poll request, we call io_cqring_ev_posted() when we've killed poll requests. However, since the rings aren't setup, we won't find any poll requests. Make the calling of io_cqring_ev_posted() dependent on actually having completed requests. This fixes this setup corner case, and removes spurious calls if we remove poll requests and don't find any. Reported-by: Qian Cai Signed-off-by: Jens Axboe --- fs/io_uring.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/io_uring.c b/fs/io_uring.c index aac54772e12e..32cbace58256 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -4392,7 +4392,7 @@ static void io_poll_remove_all(struct io_ring_ctx *ctx) { struct hlist_node *tmp; struct io_kiocb *req; - int i; + int posted = 0, i; spin_lock_irq(&ctx->completion_lock); for (i = 0; i < (1U << ctx->cancel_hash_bits); i++) { @@ -4400,11 +4400,12 @@ static void io_poll_remove_all(struct io_ring_ctx *ctx) list = &ctx->cancel_hash[i]; hlist_for_each_entry_safe(req, tmp, list, hash_node) - io_poll_remove_one(req); + posted += io_poll_remove_one(req); } spin_unlock_irq(&ctx->completion_lock); - io_cqring_ev_posted(ctx); + if (posted) + io_cqring_ev_posted(ctx); } static int io_poll_cancel(struct io_ring_ctx *ctx, __u64 sqe_addr) -- cgit v1.2.3-59-g8ed1b From 22cad1585c6bc6caf2688701004cf2af6865cbe0 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Wed, 15 Apr 2020 00:39:48 +0300 Subject: io_uring: fix cached_sq_head in io_timeout() io_timeout() can be executed asynchronously by a worker and without holding ctx->uring_lock 1. using ctx->cached_sq_head there is racy there 2. it should count events from a moment of timeout's submission, but not execution Use req->sequence. Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/io_uring.c b/fs/io_uring.c index 32cbace58256..9325ac618cf0 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -4714,6 +4714,7 @@ static int io_timeout(struct io_kiocb *req) struct io_timeout_data *data; struct list_head *entry; unsigned span = 0; + u32 seq = req->sequence; data = &req->io->timeout; @@ -4730,7 +4731,7 @@ static int io_timeout(struct io_kiocb *req) goto add; } - req->sequence = ctx->cached_sq_head + count - 1; + req->sequence = seq + count; data->seq_offset = count; /* @@ -4740,7 +4741,7 @@ static int io_timeout(struct io_kiocb *req) spin_lock_irq(&ctx->completion_lock); list_for_each_prev(entry, &ctx->timeout_list) { struct io_kiocb *nxt = list_entry(entry, struct io_kiocb, list); - unsigned nxt_sq_head; + unsigned nxt_seq; long long tmp, tmp_nxt; u32 nxt_offset = nxt->io->timeout.seq_offset; @@ -4748,18 +4749,18 @@ static int io_timeout(struct io_kiocb *req) continue; /* - * Since cached_sq_head + count - 1 can overflow, use type long + * Since seq + count can overflow, use type long * long to store it. */ - tmp = (long long)ctx->cached_sq_head + count - 1; - nxt_sq_head = nxt->sequence - nxt_offset + 1; - tmp_nxt = (long long)nxt_sq_head + nxt_offset - 1; + tmp = (long long)seq + count; + nxt_seq = nxt->sequence - nxt_offset; + tmp_nxt = (long long)nxt_seq + nxt_offset; /* * cached_sq_head may overflow, and it will never overflow twice * once there is some timeout req still be valid. */ - if (ctx->cached_sq_head < nxt_sq_head) + if (seq < nxt_seq) tmp += UINT_MAX; if (tmp > tmp_nxt) -- cgit v1.2.3-59-g8ed1b From b55ce732004989c85bf9d858c03e6d477cf9023b Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Wed, 15 Apr 2020 00:39:49 +0300 Subject: io_uring: kill already cached timeout.seq_offset req->timeout.count and req->io->timeout.seq_offset store the same value, which is sqe->off. Kill the second one Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/io_uring.c b/fs/io_uring.c index 9325ac618cf0..3fc33ba4855d 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -357,7 +357,6 @@ struct io_timeout_data { struct hrtimer timer; struct timespec64 ts; enum hrtimer_mode mode; - u32 seq_offset; }; struct io_accept { @@ -385,7 +384,7 @@ struct io_timeout { struct file *file; u64 addr; int flags; - unsigned count; + u32 count; }; struct io_rw { @@ -4709,11 +4708,11 @@ static int io_timeout_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe, static int io_timeout(struct io_kiocb *req) { - unsigned count; struct io_ring_ctx *ctx = req->ctx; struct io_timeout_data *data; struct list_head *entry; unsigned span = 0; + u32 count = req->timeout.count; u32 seq = req->sequence; data = &req->io->timeout; @@ -4723,7 +4722,6 @@ static int io_timeout(struct io_kiocb *req) * timeout event to be satisfied. If it isn't set, then this is * a pure timeout request, sequence isn't used. */ - count = req->timeout.count; if (!count) { req->flags |= REQ_F_TIMEOUT_NOSEQ; spin_lock_irq(&ctx->completion_lock); @@ -4732,7 +4730,6 @@ static int io_timeout(struct io_kiocb *req) } req->sequence = seq + count; - data->seq_offset = count; /* * Insertion sort, ensuring the first entry in the list is always @@ -4743,7 +4740,7 @@ static int io_timeout(struct io_kiocb *req) struct io_kiocb *nxt = list_entry(entry, struct io_kiocb, list); unsigned nxt_seq; long long tmp, tmp_nxt; - u32 nxt_offset = nxt->io->timeout.seq_offset; + u32 nxt_offset = nxt->timeout.count; if (nxt->flags & REQ_F_TIMEOUT_NOSEQ) continue; -- cgit v1.2.3-59-g8ed1b From 31af27c7cc9f675d93a135dca99e6413f9096f1d Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Wed, 15 Apr 2020 00:39:50 +0300 Subject: io_uring: don't count rqs failed after current one When checking for draining with __req_need_defer(), it tries to match how many requests were sent before a current one with number of already completed. Dropped SQEs are included in req->sequence, and they won't ever appear in CQ. To compensate for that, __req_need_defer() substracts ctx->cached_sq_dropped. However, what it should really use is number of SQEs dropped __before__ the current one. In other words, any submitted request shouldn't shouldn't affect dequeueing from the drain queue of previously submitted ones. Instead of saving proper ctx->cached_sq_dropped in each request, substract from req->sequence it at initialisation, so it includes number of properly submitted requests. note: it also changes behaviour of timeouts, but 1. it's already diverge from the description because of using SQ 2. the description is ambiguous regarding dropped SQEs Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/io_uring.c b/fs/io_uring.c index 3fc33ba4855d..381d50becd04 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -957,8 +957,8 @@ static inline bool __req_need_defer(struct io_kiocb *req) { struct io_ring_ctx *ctx = req->ctx; - return req->sequence != ctx->cached_cq_tail + ctx->cached_sq_dropped - + atomic_read(&ctx->cached_cq_overflow); + return req->sequence != ctx->cached_cq_tail + + atomic_read(&ctx->cached_cq_overflow); } static inline bool req_need_defer(struct io_kiocb *req) @@ -5801,7 +5801,7 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req, * it can be used to mark the position of the first IO in the * link list. */ - req->sequence = ctx->cached_sq_head; + req->sequence = ctx->cached_sq_head - ctx->cached_sq_dropped; req->opcode = READ_ONCE(sqe->opcode); req->user_data = READ_ONCE(sqe->user_data); req->io = NULL; -- cgit v1.2.3-59-g8ed1b