aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/io_uring/io_uring.c
diff options
context:
space:
mode:
authorJens Axboe <axboe@kernel.dk>2024-06-03 11:19:10 -0600
committerJens Axboe <axboe@kernel.dk>2024-06-16 14:54:55 -0600
commit60b6c075e8eb8bd23c106e2ab13370a146a94a5b (patch)
treec0e9290454a312d572705bbb9f6d242a5ba32605 /io_uring/io_uring.c
parentio_uring/rsrc: Drop io_copy_iov in favor of iovec API (diff)
downloadwireguard-linux-60b6c075e8eb8bd23c106e2ab13370a146a94a5b.tar.xz
wireguard-linux-60b6c075e8eb8bd23c106e2ab13370a146a94a5b.zip
io_uring/eventfd: move to more idiomatic RCU free usage
In some ways, it just "happens to work" currently with using the ops field for both the free and signaling bit. But it depends on ordering of operations in terms of freeing and signaling. Clean it up and use the usual refs == 0 under RCU read side lock to determine if the ev_fd is still valid, and use the reference to gate the freeing as well. Fixes: 21a091b970cd ("io_uring: signal registered eventfd to process deferred task work") Signed-off-by: Jens Axboe <axboe@kernel.dk>
Diffstat (limited to 'io_uring/io_uring.c')
-rw-r--r--io_uring/io_uring.c49
1 files changed, 26 insertions, 23 deletions
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 154b25b8a613..0a24feec27f7 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -541,29 +541,33 @@ static __cold void io_queue_deferred(struct io_ring_ctx *ctx)
}
}
-void io_eventfd_ops(struct rcu_head *rcu)
+void io_eventfd_free(struct rcu_head *rcu)
{
struct io_ev_fd *ev_fd = container_of(rcu, struct io_ev_fd, rcu);
- int ops = atomic_xchg(&ev_fd->ops, 0);
- if (ops & BIT(IO_EVENTFD_OP_SIGNAL_BIT))
- eventfd_signal_mask(ev_fd->cq_ev_fd, EPOLL_URING_WAKE);
+ eventfd_ctx_put(ev_fd->cq_ev_fd);
+ kfree(ev_fd);
+}
- /* IO_EVENTFD_OP_FREE_BIT may not be set here depending on callback
- * ordering in a race but if references are 0 we know we have to free
- * it regardless.
- */
- if (atomic_dec_and_test(&ev_fd->refs)) {
- eventfd_ctx_put(ev_fd->cq_ev_fd);
- kfree(ev_fd);
- }
+void io_eventfd_do_signal(struct rcu_head *rcu)
+{
+ struct io_ev_fd *ev_fd = container_of(rcu, struct io_ev_fd, rcu);
+
+ eventfd_signal_mask(ev_fd->cq_ev_fd, EPOLL_URING_WAKE);
+
+ if (atomic_dec_and_test(&ev_fd->refs))
+ io_eventfd_free(rcu);
}
static void io_eventfd_signal(struct io_ring_ctx *ctx)
{
struct io_ev_fd *ev_fd = NULL;
- rcu_read_lock();
+ if (READ_ONCE(ctx->rings->cq_flags) & IORING_CQ_EVENTFD_DISABLED)
+ return;
+
+ guard(rcu)();
+
/*
* rcu_dereference ctx->io_ev_fd once and use it for both for checking
* and eventfd_signal
@@ -576,24 +580,23 @@ static void io_eventfd_signal(struct io_ring_ctx *ctx)
* the function and rcu_read_lock.
*/
if (unlikely(!ev_fd))
- goto out;
- if (READ_ONCE(ctx->rings->cq_flags) & IORING_CQ_EVENTFD_DISABLED)
- goto out;
+ return;
+ if (!atomic_inc_not_zero(&ev_fd->refs))
+ return;
if (ev_fd->eventfd_async && !io_wq_current_is_worker())
goto out;
if (likely(eventfd_signal_allowed())) {
eventfd_signal_mask(ev_fd->cq_ev_fd, EPOLL_URING_WAKE);
} else {
- atomic_inc(&ev_fd->refs);
- if (!atomic_fetch_or(BIT(IO_EVENTFD_OP_SIGNAL_BIT), &ev_fd->ops))
- call_rcu_hurry(&ev_fd->rcu, io_eventfd_ops);
- else
- atomic_dec(&ev_fd->refs);
+ if (!atomic_fetch_or(BIT(IO_EVENTFD_OP_SIGNAL_BIT), &ev_fd->ops)) {
+ call_rcu_hurry(&ev_fd->rcu, io_eventfd_do_signal);
+ return;
+ }
}
-
out:
- rcu_read_unlock();
+ if (atomic_dec_and_test(&ev_fd->refs))
+ call_rcu(&ev_fd->rcu, io_eventfd_free);
}
static void io_eventfd_flush_signal(struct io_ring_ctx *ctx)