From 97c96e9fa36616d7890a6f3438172fc501927f01 Mon Sep 17 00:00:00 2001 From: Dylan Yudaken Date: Mon, 26 Sep 2022 10:09:26 -0700 Subject: io_uring: simplify __io_uring_add_tctx_node Remove submitter parameter from __io_uring_add_tctx_node. It was only called from one place, and we can do that logic in that one place. Signed-off-by: Dylan Yudaken Fixes: 97bbdc06a444 ("io_uring: add IORING_SETUP_SINGLE_ISSUER") Signed-off-by: Jens Axboe --- io_uring/io_uring.c | 2 +- io_uring/tctx.c | 30 ++++++++++++++++++++---------- io_uring/tctx.h | 6 ++++-- 3 files changed, 25 insertions(+), 13 deletions(-) diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 99a52f34b7d3..fe6ef64c873e 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -3355,7 +3355,7 @@ static int io_uring_install_fd(struct io_ring_ctx *ctx, struct file *file) if (fd < 0) return fd; - ret = __io_uring_add_tctx_node(ctx, false); + ret = __io_uring_add_tctx_node(ctx); if (ret) { put_unused_fd(fd); return ret; diff --git a/io_uring/tctx.c b/io_uring/tctx.c index 7f97d97fef0a..dd0205fcdb13 100644 --- a/io_uring/tctx.c +++ b/io_uring/tctx.c @@ -105,18 +105,12 @@ static int io_register_submitter(struct io_ring_ctx *ctx) return ret; } -int __io_uring_add_tctx_node(struct io_ring_ctx *ctx, bool submitter) +int __io_uring_add_tctx_node(struct io_ring_ctx *ctx) { struct io_uring_task *tctx = current->io_uring; struct io_tctx_node *node; int ret; - if ((ctx->flags & IORING_SETUP_SINGLE_ISSUER) && submitter) { - ret = io_register_submitter(ctx); - if (ret) - return ret; - } - if (unlikely(!tctx)) { ret = io_uring_alloc_task_context(current, ctx); if (unlikely(ret)) @@ -150,8 +144,24 @@ int __io_uring_add_tctx_node(struct io_ring_ctx *ctx, bool submitter) list_add(&node->ctx_node, &ctx->tctx_list); mutex_unlock(&ctx->uring_lock); } - if (submitter) - tctx->last = ctx; + return 0; +} + +int __io_uring_add_tctx_node_from_submit(struct io_ring_ctx *ctx) +{ + int ret; + + if (ctx->flags & IORING_SETUP_SINGLE_ISSUER) { + ret = io_register_submitter(ctx); + if (ret) + return ret; + } + + ret = __io_uring_add_tctx_node(ctx); + if (ret) + return ret; + + current->io_uring->last = ctx; return 0; } @@ -259,7 +269,7 @@ int io_ringfd_register(struct io_ring_ctx *ctx, void __user *__arg, return -EINVAL; mutex_unlock(&ctx->uring_lock); - ret = __io_uring_add_tctx_node(ctx, false); + ret = __io_uring_add_tctx_node(ctx); mutex_lock(&ctx->uring_lock); if (ret) return ret; diff --git a/io_uring/tctx.h b/io_uring/tctx.h index 25974beed4d6..608e96de70a2 100644 --- a/io_uring/tctx.h +++ b/io_uring/tctx.h @@ -9,7 +9,8 @@ struct io_tctx_node { int io_uring_alloc_task_context(struct task_struct *task, struct io_ring_ctx *ctx); void io_uring_del_tctx_node(unsigned long index); -int __io_uring_add_tctx_node(struct io_ring_ctx *ctx, bool submitter); +int __io_uring_add_tctx_node(struct io_ring_ctx *ctx); +int __io_uring_add_tctx_node_from_submit(struct io_ring_ctx *ctx); void io_uring_clean_tctx(struct io_uring_task *tctx); void io_uring_unreg_ringfd(void); @@ -27,5 +28,6 @@ static inline int io_uring_add_tctx_node(struct io_ring_ctx *ctx) if (likely(tctx && tctx->last == ctx)) return 0; - return __io_uring_add_tctx_node(ctx, true); + + return __io_uring_add_tctx_node_from_submit(ctx); } -- cgit v1.2.3-59-g8ed1b From 4add705e4eebbdd919741de0548d7029c8c92b68 Mon Sep 17 00:00:00 2001 From: Dylan Yudaken Date: Mon, 26 Sep 2022 10:09:27 -0700 Subject: io_uring: remove io_register_submitter this is no longer needed, as submitter_task is set at creation time. Signed-off-by: Dylan Yudaken Fixes: 97bbdc06a444 ("io_uring: add IORING_SETUP_SINGLE_ISSUER") Signed-off-by: Jens Axboe --- io_uring/tctx.c | 22 +++------------------- 1 file changed, 3 insertions(+), 19 deletions(-) diff --git a/io_uring/tctx.c b/io_uring/tctx.c index dd0205fcdb13..4324b1cf1f6a 100644 --- a/io_uring/tctx.c +++ b/io_uring/tctx.c @@ -91,20 +91,6 @@ __cold int io_uring_alloc_task_context(struct task_struct *task, return 0; } -static int io_register_submitter(struct io_ring_ctx *ctx) -{ - int ret = 0; - - mutex_lock(&ctx->uring_lock); - if (!ctx->submitter_task) - ctx->submitter_task = get_task_struct(current); - else if (ctx->submitter_task != current) - ret = -EEXIST; - mutex_unlock(&ctx->uring_lock); - - return ret; -} - int __io_uring_add_tctx_node(struct io_ring_ctx *ctx) { struct io_uring_task *tctx = current->io_uring; @@ -151,11 +137,9 @@ int __io_uring_add_tctx_node_from_submit(struct io_ring_ctx *ctx) { int ret; - if (ctx->flags & IORING_SETUP_SINGLE_ISSUER) { - ret = io_register_submitter(ctx); - if (ret) - return ret; - } + if (ctx->flags & IORING_SETUP_SINGLE_ISSUER + && ctx->submitter_task != current) + return -EEXIST; ret = __io_uring_add_tctx_node(ctx); if (ret) -- cgit v1.2.3-59-g8ed1b From d7cce96c449e35bbfd41e830b341b95973891eed Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Tue, 27 Sep 2022 01:13:30 +0100 Subject: io_uring: limit registration w/ SINGLE_ISSUER IORING_SETUP_SINGLE_ISSUER restricts what tasks can submit requests. Extend it to registration as well, so non-owning task can't do registrations. It's not necessary at the moment but might be useful in the future. Cc: # 6.0 Fixes: 97bbdc06a444 ("io_uring: add IORING_SETUP_SINGLE_ISSUER") Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/f52a6a9c8a8990d4a831f73c0571e7406aac2bba.1664237592.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- io_uring/io_uring.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index fe6ef64c873e..63f6ce5e5355 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -3890,6 +3890,9 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode, if (WARN_ON_ONCE(percpu_ref_is_dying(&ctx->refs))) return -ENXIO; + if (ctx->submitter_task && ctx->submitter_task != current) + return -EEXIST; + if (ctx->restricted) { if (opcode >= IORING_REGISTER_LAST) return -EINVAL; -- cgit v1.2.3-59-g8ed1b From 0091bfc81741b8d3aeb3b7ab8636f911b2de6e80 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Mon, 3 Oct 2022 13:59:47 +0100 Subject: io_uring/af_unix: defer registered files gc to io_uring release Instead of putting io_uring's registered files in unix_gc() we want it to be done by io_uring itself. The trick here is to consider io_uring registered files for cycle detection but not actually putting them down. Because io_uring can't register other ring instances, this will remove all refs to the ring file triggering the ->release path and clean up with io_ring_ctx_free(). Cc: stable@vger.kernel.org Fixes: 6b06314c47e1 ("io_uring: add file set registration") Reported-and-tested-by: David Bouman Signed-off-by: Pavel Begunkov Signed-off-by: Thadeu Lima de Souza Cascardo [axboe: add kerneldoc comment to skb, fold in skb leak fix] Signed-off-by: Jens Axboe --- include/linux/skbuff.h | 2 ++ io_uring/rsrc.c | 1 + net/unix/garbage.c | 20 ++++++++++++++++++++ 3 files changed, 23 insertions(+) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 9fcf534f2d92..7be5bb4c94b6 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -803,6 +803,7 @@ typedef unsigned char *sk_buff_data_t; * @csum_level: indicates the number of consecutive checksums found in * the packet minus one that have been verified as * CHECKSUM_UNNECESSARY (max 3) + * @scm_io_uring: SKB holds io_uring registered files * @dst_pending_confirm: need to confirm neighbour * @decrypted: Decrypted SKB * @slow_gro: state present at GRO time, slower prepare step required @@ -982,6 +983,7 @@ struct sk_buff { #endif __u8 slow_gro:1; __u8 csum_not_inet:1; + __u8 scm_io_uring:1; #ifdef CONFIG_NET_SCHED __u16 tc_index; /* traffic control index */ diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c index 6f88ded0e7e5..012fdb04ec23 100644 --- a/io_uring/rsrc.c +++ b/io_uring/rsrc.c @@ -855,6 +855,7 @@ int __io_scm_file_account(struct io_ring_ctx *ctx, struct file *file) UNIXCB(skb).fp = fpl; skb->sk = sk; + skb->scm_io_uring = 1; skb->destructor = unix_destruct_scm; refcount_add(skb->truesize, &sk->sk_wmem_alloc); } diff --git a/net/unix/garbage.c b/net/unix/garbage.c index d45d5366115a..dc2763540393 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -204,6 +204,7 @@ void wait_for_unix_gc(void) /* The external entry point: unix_gc() */ void unix_gc(void) { + struct sk_buff *next_skb, *skb; struct unix_sock *u; struct unix_sock *next; struct sk_buff_head hitlist; @@ -297,11 +298,30 @@ void unix_gc(void) spin_unlock(&unix_gc_lock); + /* We need io_uring to clean its registered files, ignore all io_uring + * originated skbs. It's fine as io_uring doesn't keep references to + * other io_uring instances and so killing all other files in the cycle + * will put all io_uring references forcing it to go through normal + * release.path eventually putting registered files. + */ + skb_queue_walk_safe(&hitlist, skb, next_skb) { + if (skb->scm_io_uring) { + __skb_unlink(skb, &hitlist); + skb_queue_tail(&skb->sk->sk_receive_queue, skb); + } + } + /* Here we are. Hitlist is filled. Die. */ __skb_queue_purge(&hitlist); spin_lock(&unix_gc_lock); + /* There could be io_uring registered files, just push them back to + * the inflight list + */ + list_for_each_entry_safe(u, next, &gc_candidates, link) + list_move_tail(&u->link, &gc_inflight_list); + /* All candidates should have been detached by now. */ BUG_ON(!list_empty(&gc_candidates)); -- cgit v1.2.3-59-g8ed1b From 42b6419d0aba47c5d8644cdc0b68502254671de5 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Tue, 4 Oct 2022 03:19:08 +0100 Subject: io_uring: correct pinned_vm accounting ->mm_account should be released only after we free all registered buffers, otherwise __io_sqe_buffers_unregister() will see a NULL ->mm_account and skip locked_vm accounting. Cc: Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/6d798f65ed4ab8db3664c4d3397d4af16ca98846.1664849932.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- io_uring/io_uring.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 63f6ce5e5355..ea5cee593bbd 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -2585,12 +2585,6 @@ static void io_req_caches_free(struct io_ring_ctx *ctx) static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx) { io_sq_thread_finish(ctx); - - if (ctx->mm_account) { - mmdrop(ctx->mm_account); - ctx->mm_account = NULL; - } - io_rsrc_refs_drop(ctx); /* __io_rsrc_put_work() may need uring_lock to progress, wait w/o it */ io_wait_rsrc_data(ctx->buf_data); @@ -2633,6 +2627,10 @@ static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx) WARN_ON_ONCE(!list_empty(&ctx->ltimeout_list)); WARN_ON_ONCE(ctx->notif_slots || ctx->nr_notif_slots); + if (ctx->mm_account) { + mmdrop(ctx->mm_account); + ctx->mm_account = NULL; + } io_mem_free(ctx->rings); io_mem_free(ctx->sq_sqes); -- cgit v1.2.3-59-g8ed1b From b7a817752efc850603c4c23ed78da2b990a6a34a Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Tue, 4 Oct 2022 03:19:25 +0100 Subject: io_uring: remove notif leftovers Notifications were killed but there is a couple of fields and struct declarations left, remove them. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/8df8877d677be5a2b43afd936d600e60105ea960.1664849941.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- include/linux/io_uring_types.h | 5 ----- io_uring/io_uring.c | 1 - 2 files changed, 6 deletions(-) diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index aa4d90a53866..f5b687a787a3 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -34,9 +34,6 @@ struct io_file_table { unsigned int alloc_hint; }; -struct io_notif; -struct io_notif_slot; - struct io_hash_bucket { spinlock_t lock; struct hlist_head list; @@ -242,8 +239,6 @@ struct io_ring_ctx { unsigned nr_user_files; unsigned nr_user_bufs; struct io_mapped_ubuf **user_bufs; - struct io_notif_slot *notif_slots; - unsigned nr_notif_slots; struct io_submit_state submit_state; diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index ea5cee593bbd..b12ec6b5a464 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -2625,7 +2625,6 @@ static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx) } #endif WARN_ON_ONCE(!list_empty(&ctx->ltimeout_list)); - WARN_ON_ONCE(ctx->notif_slots || ctx->nr_notif_slots); if (ctx->mm_account) { mmdrop(ctx->mm_account); -- cgit v1.2.3-59-g8ed1b From 3fb1bd68817288729179444caf1fd5c5c4d2d65d Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 4 Oct 2022 20:29:48 -0600 Subject: io_uring/net: handle -EINPROGRESS correct for IORING_OP_CONNECT We treat EINPROGRESS like EAGAIN, but if we're retrying post getting EINPROGRESS, then we just need to check the socket for errors and terminate the request. This was exposed on a bluetooth connection request which ends up taking a while and hitting EINPROGRESS, and yields a CQE result of -EBADFD because we're retrying a connect on a socket that is now connected. Cc: stable@vger.kernel.org Fixes: 87f80d623c6c ("io_uring: handle connect -EINPROGRESS like -EAGAIN") Link: https://github.com/axboe/liburing/issues/671 Reported-by: Aidan Sun Signed-off-by: Jens Axboe --- io_uring/net.c | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/io_uring/net.c b/io_uring/net.c index caa6a803cb72..8c7226b5bf41 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -46,6 +46,7 @@ struct io_connect { struct file *file; struct sockaddr __user *addr; int addr_len; + bool in_progress; }; struct io_sr_msg { @@ -1386,6 +1387,7 @@ int io_connect_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) conn->addr = u64_to_user_ptr(READ_ONCE(sqe->addr)); conn->addr_len = READ_ONCE(sqe->addr2); + conn->in_progress = false; return 0; } @@ -1397,6 +1399,16 @@ int io_connect(struct io_kiocb *req, unsigned int issue_flags) int ret; bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; + if (connect->in_progress) { + struct socket *socket; + + ret = -ENOTSOCK; + socket = sock_from_file(req->file); + if (socket) + ret = sock_error(socket->sk); + goto out; + } + if (req_has_async_data(req)) { io = req->async_data; } else { @@ -1413,13 +1425,17 @@ int io_connect(struct io_kiocb *req, unsigned int issue_flags) ret = __sys_connect_file(req->file, &io->address, connect->addr_len, file_flags); if ((ret == -EAGAIN || ret == -EINPROGRESS) && force_nonblock) { - if (req_has_async_data(req)) - return -EAGAIN; - if (io_alloc_async_data(req)) { - ret = -ENOMEM; - goto out; + if (ret == -EINPROGRESS) { + connect->in_progress = true; + } else { + if (req_has_async_data(req)) + return -EAGAIN; + if (io_alloc_async_data(req)) { + ret = -ENOMEM; + goto out; + } + memcpy(req->async_data, &__io, sizeof(__io)); } - memcpy(req->async_data, &__io, sizeof(__io)); return -EAGAIN; } if (ret == -ERESTARTSYS) -- cgit v1.2.3-59-g8ed1b From fc86f9d3bb4904117eea70347d323fde34a47c79 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 6 Oct 2022 02:06:10 +0100 Subject: io_uring: remove redundant memory barrier in io_req_local_work_add io_cqring_wake() needs a barrier for the waitqueue_active() check. However, in the case of io_req_local_work_add(), we call llist_add() first, which implies an atomic. Hence we can replace smb_mb() with smp_mb__after_atomic(). Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/43983bc8bc507172adda7a0f00cab1aff09fd238.1665018309.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- io_uring/io_uring.c | 5 +++-- io_uring/io_uring.h | 11 +++++++++-- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index b12ec6b5a464..12870cd7cb07 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -1106,6 +1106,8 @@ static void io_req_local_work_add(struct io_kiocb *req) if (!llist_add(&req->io_task_work.node, &ctx->work_llist)) return; + /* need it for the following io_cqring_wake() */ + smp_mb__after_atomic(); if (unlikely(atomic_read(&req->task->io_uring->in_idle))) { io_move_task_work_from_local(ctx); @@ -1117,8 +1119,7 @@ static void io_req_local_work_add(struct io_kiocb *req) if (ctx->has_evfd) io_eventfd_signal(ctx); - io_cqring_wake(ctx); - + __io_cqring_wake(ctx); } static inline void __io_req_task_work_add(struct io_kiocb *req, bool allow_local) diff --git a/io_uring/io_uring.h b/io_uring/io_uring.h index 48ce2348c8c1..47d4cad1e9c4 100644 --- a/io_uring/io_uring.h +++ b/io_uring/io_uring.h @@ -203,17 +203,24 @@ static inline void io_commit_cqring(struct io_ring_ctx *ctx) smp_store_release(&ctx->rings->cq.tail, ctx->cached_cq_tail); } -static inline void io_cqring_wake(struct io_ring_ctx *ctx) +/* requires smb_mb() prior, see wq_has_sleeper() */ +static inline void __io_cqring_wake(struct io_ring_ctx *ctx) { /* * wake_up_all() may seem excessive, but io_wake_function() and * io_should_wake() handle the termination of the loop and only * wake as many waiters as we need to. */ - if (wq_has_sleeper(&ctx->cq_wait)) + if (waitqueue_active(&ctx->cq_wait)) wake_up_all(&ctx->cq_wait); } +static inline void io_cqring_wake(struct io_ring_ctx *ctx) +{ + smp_mb(); + __io_cqring_wake(ctx); +} + static inline bool io_sqring_full(struct io_ring_ctx *ctx) { struct io_rings *r = ctx->rings; -- cgit v1.2.3-59-g8ed1b From 44f87745d5f24a3cdf0548bf1d84fbb7316ce229 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 6 Oct 2022 21:42:33 +0100 Subject: io_uring: optimise locking for local tw with submit_wait Running local task_work requires taking uring_lock, for submit + wait we can try to run them right after submit while we still hold the lock and save one lock/unlokc pair. The optimisation was implemented in the first local tw patches but got dropped for simplicity. Suggested-by: Dylan Yudaken Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/281fc79d98b5d91fe4778c5137a17a2ab4693e5c.1665088876.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- io_uring/io_uring.c | 12 ++++++++++-- io_uring/io_uring.h | 7 +++++++ 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 12870cd7cb07..de08d9902b30 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -3227,8 +3227,16 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit, mutex_unlock(&ctx->uring_lock); goto out; } - if ((flags & IORING_ENTER_GETEVENTS) && ctx->syscall_iopoll) - goto iopoll_locked; + if (flags & IORING_ENTER_GETEVENTS) { + if (ctx->syscall_iopoll) + goto iopoll_locked; + /* + * Ignore errors, we'll soon call io_cqring_wait() and + * it should handle ownership problems if any. + */ + if (ctx->flags & IORING_SETUP_DEFER_TASKRUN) + (void)io_run_local_work_locked(ctx); + } mutex_unlock(&ctx->uring_lock); } diff --git a/io_uring/io_uring.h b/io_uring/io_uring.h index 47d4cad1e9c4..ef77d2aa3172 100644 --- a/io_uring/io_uring.h +++ b/io_uring/io_uring.h @@ -275,6 +275,13 @@ static inline int io_run_task_work_ctx(struct io_ring_ctx *ctx) return ret; } +static inline int io_run_local_work_locked(struct io_ring_ctx *ctx) +{ + if (llist_empty(&ctx->work_llist)) + return 0; + return __io_run_local_work(ctx, true); +} + static inline void io_tw_lock(struct io_ring_ctx *ctx, bool *locked) { if (!*locked) { -- cgit v1.2.3-59-g8ed1b From 11528491c65a493050c682786c6b7cfd9e9b4a8f Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 7 Oct 2022 12:26:02 -0600 Subject: io_uring/opdef: remove 'audit_skip' from SENDMSG_ZC The msg variants of sending aren't audited separately, so we should not be setting audit_skip for the zerocopy sendmsg variant either. Fixes: 493108d95f14 ("io_uring/net: zerocopy sendmsg") Reported-by: Paul Moore Reviewed-by: Paul Moore Signed-off-by: Jens Axboe --- io_uring/opdef.c | 1 - 1 file changed, 1 deletion(-) diff --git a/io_uring/opdef.c b/io_uring/opdef.c index 2330f6da791e..83dc0f9ad3b2 100644 --- a/io_uring/opdef.c +++ b/io_uring/opdef.c @@ -510,7 +510,6 @@ const struct io_op_def io_op_defs[] = { .needs_file = 1, .unbound_nonreg_file = 1, .pollout = 1, - .audit_skip = 1, .ioprio = 1, .manual_alloc = 1, #if defined(CONFIG_NET) -- cgit v1.2.3-59-g8ed1b From c86416c6ff5ba7f7e5f3ff1dd8a9d1b3d0be827c Mon Sep 17 00:00:00 2001 From: Stefan Roesch Date: Mon, 10 Oct 2022 16:43:30 -0700 Subject: io_uring: local variable rw shadows outer variable in io_write This fixes the shadowing of the outer variable rw in the function io_write(). No issue is caused by this, but let's silence the shadowing warning anyway. Reported-by: kernel test robot Signed-off-by: Stefan Roesch Link: https://lore.kernel.org/r/20221010234330.244244-1-shr@devkernel.io Signed-off-by: Jens Axboe --- io_uring/rw.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/io_uring/rw.c b/io_uring/rw.c index a25cd44cd415..453e0ae92160 100644 --- a/io_uring/rw.c +++ b/io_uring/rw.c @@ -916,7 +916,7 @@ int io_write(struct io_kiocb *req, unsigned int issue_flags) goto copy_iov; if (ret2 != req->cqe.res && ret2 >= 0 && need_complete_io(req)) { - struct io_async_rw *rw; + struct io_async_rw *io; trace_io_uring_short_write(req->ctx, kiocb->ki_pos - ret2, req->cqe.res, ret2); @@ -929,9 +929,9 @@ int io_write(struct io_kiocb *req, unsigned int issue_flags) iov_iter_save_state(&s->iter, &s->iter_state); ret = io_setup_async_rw(req, iovec, s, true); - rw = req->async_data; - if (rw) - rw->bytes_done += ret2; + io = req->async_data; + if (io) + io->bytes_done += ret2; if (kiocb->ki_flags & IOCB_WRITE) kiocb_end_write(req); -- cgit v1.2.3-59-g8ed1b From 00927931cb630bbf8edb6d7f4dadb25139fc5e16 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Tue, 11 Oct 2022 01:59:57 +0100 Subject: io_uring: fix fdinfo sqe offsets calculation Only with the big sqe feature they take 128 bytes per entry, but we unconditionally advance by 128B. Fix it by using sq_shift. Fixes: 3b8fdd1dc35e3 ("io_uring/fdinfo: fix sqe dumping for IORING_SETUP_SQE128") Reported-and-tested-by: syzbot+e5198737e8a2d23d958c@syzkaller.appspotmail.com Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/8b41287cb75d5efb8fcb5cccde845ddbbadd8372.1665449983.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- io_uring/fdinfo.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/io_uring/fdinfo.c b/io_uring/fdinfo.c index 4eae088046d0..2e04850a657b 100644 --- a/io_uring/fdinfo.c +++ b/io_uring/fdinfo.c @@ -94,7 +94,7 @@ static __cold void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, sq_idx = READ_ONCE(ctx->sq_array[entry & sq_mask]); if (sq_idx > sq_mask) continue; - sqe = &ctx->sq_sqes[sq_idx << 1]; + sqe = &ctx->sq_sqes[sq_idx << sq_shift]; seq_printf(m, "%5u: opcode:%s, fd:%d, flags:%x, off:%llu, " "addr:0x%llx, rw_flags:0x%x, buf_index:%d " "user_data:%llu", -- cgit v1.2.3-59-g8ed1b From 2ec33a6c3cca9fe2465e82050c81f5ffdc508b36 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 11 Oct 2022 09:06:23 -0600 Subject: io_uring/rw: ensure kiocb_end_write() is always called A previous commit moved the notifications and end-write handling, but it is now missing a few spots where we also want to call both of those. Without that, we can potentially be missing file notifications, and more importantly, have an imbalance in the super_block writers sem accounting. Fixes: b000145e9907 ("io_uring/rw: defer fsnotify calls to task context") Reported-by: Dave Chinner Link: https://lore.kernel.org/all/20221010050319.GC2703033@dread.disaster.area/ Signed-off-by: Jens Axboe --- io_uring/rw.c | 38 +++++++++++++++++++++++++++++--------- 1 file changed, 29 insertions(+), 9 deletions(-) diff --git a/io_uring/rw.c b/io_uring/rw.c index 453e0ae92160..100de2626e47 100644 --- a/io_uring/rw.c +++ b/io_uring/rw.c @@ -234,11 +234,34 @@ static void kiocb_end_write(struct io_kiocb *req) } } +/* + * Trigger the notifications after having done some IO, and finish the write + * accounting, if any. + */ +static void io_req_io_end(struct io_kiocb *req) +{ + struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw); + + WARN_ON(!in_task()); + + if (rw->kiocb.ki_flags & IOCB_WRITE) { + kiocb_end_write(req); + fsnotify_modify(req->file); + } else { + fsnotify_access(req->file); + } +} + static bool __io_complete_rw_common(struct io_kiocb *req, long res) { if (unlikely(res != req->cqe.res)) { if ((res == -EAGAIN || res == -EOPNOTSUPP) && io_rw_should_reissue(req)) { + /* + * Reissue will start accounting again, finish the + * current cycle. + */ + io_req_io_end(req); req->flags |= REQ_F_REISSUE | REQ_F_PARTIAL_IO; return true; } @@ -264,15 +287,7 @@ static inline int io_fixup_rw_res(struct io_kiocb *req, long res) static void io_req_rw_complete(struct io_kiocb *req, bool *locked) { - struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw); - - if (rw->kiocb.ki_flags & IOCB_WRITE) { - kiocb_end_write(req); - fsnotify_modify(req->file); - } else { - fsnotify_access(req->file); - } - + io_req_io_end(req); io_req_task_complete(req, locked); } @@ -317,6 +332,11 @@ static int kiocb_done(struct io_kiocb *req, ssize_t ret, req->file->f_pos = rw->kiocb.ki_pos; if (ret >= 0 && (rw->kiocb.ki_complete == io_complete_rw)) { if (!__io_complete_rw_common(req, ret)) { + /* + * Safe to call io_end from here as we're inline + * from the submission path. + */ + io_req_io_end(req); io_req_set_res(req, final_ret, io_put_kbuf(req, issue_flags)); return IOU_OK; -- cgit v1.2.3-59-g8ed1b