From 4a3223f7bfda14c532856152b12aace525cf8079 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 5 Mar 2024 15:39:16 -0700 Subject: io_uring/net: switch io_recv() to using io_async_msghdr No functional changes in this patch, just in preparation for carrying more state than what is available now, if necessary. Signed-off-by: Jens Axboe --- io_uring/net.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'io_uring/net.h') diff --git a/io_uring/net.h b/io_uring/net.h index 191009979bcb..5c1230f1aaf9 100644 --- a/io_uring/net.h +++ b/io_uring/net.h @@ -40,7 +40,7 @@ int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe); int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags); int io_send(struct io_kiocb *req, unsigned int issue_flags); -int io_send_prep_async(struct io_kiocb *req); +int io_sendrecv_prep_async(struct io_kiocb *req); int io_recvmsg_prep_async(struct io_kiocb *req); int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe); -- cgit v1.2.3-59-g8ed1b From c6f32c7d9e09bf1368447e9a29e869193ecbb756 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 18 Mar 2024 07:36:03 -0600 Subject: io_uring/net: get rid of ->prep_async() for receive side Move the io_async_msghdr out of the issue path and into prep handling, since it's now done unconditionally and hence does not need to be part of the issue path. This reduces the footprint of the multishot fast path of multiple invocations of ->issue() per prep, and also means that using ->prep_async() can be dropped for recvmsg asthis is now done via setup on the prep side. Signed-off-by: Jens Axboe --- io_uring/net.c | 71 ++++++++++++++++++++++---------------------------------- io_uring/net.h | 1 - io_uring/opdef.c | 2 -- 3 files changed, 28 insertions(+), 46 deletions(-) (limited to 'io_uring/net.h') diff --git a/io_uring/net.c b/io_uring/net.c index b08c0ae5951a..7cd93cd8b8c4 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -595,17 +595,36 @@ static int io_recvmsg_copy_hdr(struct io_kiocb *req, msg.msg_controllen); } -int io_recvmsg_prep_async(struct io_kiocb *req) +static int io_recvmsg_prep_setup(struct io_kiocb *req) { struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); - struct io_async_msghdr *iomsg; + struct io_async_msghdr *kmsg; int ret; - sr->done_io = 0; - if (!io_msg_alloc_async_prep(req)) + /* always locked for prep */ + kmsg = io_msg_alloc_async(req, 0); + if (unlikely(!kmsg)) return -ENOMEM; - iomsg = req->async_data; - ret = io_recvmsg_copy_hdr(req, iomsg); + + if (req->opcode == IORING_OP_RECV) { + kmsg->msg.msg_name = NULL; + kmsg->msg.msg_namelen = 0; + kmsg->msg.msg_control = NULL; + kmsg->msg.msg_get_inq = 1; + kmsg->msg.msg_controllen = 0; + kmsg->msg.msg_iocb = NULL; + kmsg->msg.msg_ubuf = NULL; + + if (!io_do_buffer_select(req)) { + ret = import_ubuf(ITER_DEST, sr->buf, sr->len, + &kmsg->msg.msg_iter); + if (unlikely(ret)) + return ret; + } + return 0; + } + + ret = io_recvmsg_copy_hdr(req, kmsg); if (!ret) req->flags |= REQ_F_NEED_CLEANUP; return ret; @@ -656,7 +675,7 @@ int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) sr->msg_flags |= MSG_CMSG_COMPAT; #endif sr->nr_multishot_loops = 0; - return 0; + return io_recvmsg_prep_setup(req); } static inline void io_recv_prep_retry(struct io_kiocb *req, @@ -814,7 +833,7 @@ static int io_recvmsg_multishot(struct socket *sock, struct io_sr_msg *io, int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags) { struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); - struct io_async_msghdr *kmsg; + struct io_async_msghdr *kmsg = req->async_data; struct socket *sock; unsigned flags; int ret, min_ret = 0; @@ -825,17 +844,6 @@ int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags) if (unlikely(!sock)) return -ENOTSOCK; - if (req_has_async_data(req)) { - kmsg = req->async_data; - } else { - kmsg = io_msg_alloc_async(req, issue_flags); - if (unlikely(!kmsg)) - return -ENOMEM; - ret = io_recvmsg_copy_hdr(req, kmsg); - if (ret) - return ret; - } - if (!(req->flags & REQ_F_POLLED) && (sr->flags & IORING_RECVSEND_POLL_FIRST)) return -EAGAIN; @@ -914,36 +922,13 @@ retry_multishot: int io_recv(struct io_kiocb *req, unsigned int issue_flags) { struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); - struct io_async_msghdr *kmsg; + struct io_async_msghdr *kmsg = req->async_data; struct socket *sock; unsigned flags; int ret, min_ret = 0; bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; size_t len = sr->len; - if (req_has_async_data(req)) { - kmsg = req->async_data; - } else { - kmsg = io_msg_alloc_async(req, issue_flags); - if (unlikely(!kmsg)) - return -ENOMEM; - kmsg->free_iov = NULL; - kmsg->msg.msg_name = NULL; - kmsg->msg.msg_namelen = 0; - kmsg->msg.msg_control = NULL; - kmsg->msg.msg_get_inq = 1; - kmsg->msg.msg_controllen = 0; - kmsg->msg.msg_iocb = NULL; - kmsg->msg.msg_ubuf = NULL; - - if (!io_do_buffer_select(req)) { - ret = import_ubuf(ITER_DEST, sr->buf, sr->len, - &kmsg->msg.msg_iter); - if (unlikely(ret)) - return ret; - } - } - if (!(req->flags & REQ_F_POLLED) && (sr->flags & IORING_RECVSEND_POLL_FIRST)) return -EAGAIN; diff --git a/io_uring/net.h b/io_uring/net.h index 5c1230f1aaf9..4b4fd9b1b7b4 100644 --- a/io_uring/net.h +++ b/io_uring/net.h @@ -42,7 +42,6 @@ int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags); int io_send(struct io_kiocb *req, unsigned int issue_flags); int io_sendrecv_prep_async(struct io_kiocb *req); -int io_recvmsg_prep_async(struct io_kiocb *req); int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe); int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags); int io_recv(struct io_kiocb *req, unsigned int issue_flags); diff --git a/io_uring/opdef.c b/io_uring/opdef.c index 77131826d603..1368193edc57 100644 --- a/io_uring/opdef.c +++ b/io_uring/opdef.c @@ -536,7 +536,6 @@ const struct io_cold_def io_cold_defs[] = { .name = "RECVMSG", #if defined(CONFIG_NET) .async_size = sizeof(struct io_async_msghdr), - .prep_async = io_recvmsg_prep_async, .cleanup = io_sendmsg_recvmsg_cleanup, .fail = io_sendrecv_fail, #endif @@ -613,7 +612,6 @@ const struct io_cold_def io_cold_defs[] = { .async_size = sizeof(struct io_async_msghdr), .cleanup = io_sendmsg_recvmsg_cleanup, .fail = io_sendrecv_fail, - .prep_async = io_sendrecv_prep_async, #endif }, [IORING_OP_OPENAT2] = { -- cgit v1.2.3-59-g8ed1b From 50220d6ac8ff31eb065fba818e960f549fb89d4d Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 18 Mar 2024 08:09:47 -0600 Subject: io_uring/net: get rid of ->prep_async() for send side Move the io_async_msghdr out of the issue path and into prep handling, e it's now done unconditionally and hence does not need to be part of the issue path. This means any usage of io_sendrecv_prep_async() and io_sendmsg_prep_async(), and hence the forced async setup path is now unified with the normal prep setup. Signed-off-by: Jens Axboe --- io_uring/net.c | 154 +++++++++++++++++-------------------------------------- io_uring/net.h | 2 - io_uring/opdef.c | 4 -- 3 files changed, 46 insertions(+), 114 deletions(-) (limited to 'io_uring/net.h') diff --git a/io_uring/net.c b/io_uring/net.c index 7cd93cd8b8c4..a57b15e91ad9 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -290,50 +290,56 @@ static int io_sendmsg_copy_hdr(struct io_kiocb *req, return ret; } -int io_sendrecv_prep_async(struct io_kiocb *req) +void io_sendmsg_recvmsg_cleanup(struct io_kiocb *req) +{ + struct io_async_msghdr *io = req->async_data; + + kfree(io->free_iov); +} + +static int io_send_setup(struct io_kiocb *req) { struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); - struct io_async_msghdr *io; + struct io_async_msghdr *kmsg = req->async_data; int ret; - if (req_has_async_data(req)) - return 0; - sr->done_io = 0; - if (!sr->addr) - return 0; - io = io_msg_alloc_async_prep(req); - if (!io) - return -ENOMEM; - memset(&io->msg, 0, sizeof(io->msg)); - ret = import_ubuf(ITER_SOURCE, sr->buf, sr->len, &io->msg.msg_iter); - if (unlikely(ret)) + kmsg->msg.msg_name = NULL; + kmsg->msg.msg_namelen = 0; + kmsg->msg.msg_control = NULL; + kmsg->msg.msg_controllen = 0; + kmsg->msg.msg_ubuf = NULL; + + if (sr->addr) { + ret = move_addr_to_kernel(sr->addr, sr->addr_len, &kmsg->addr); + if (unlikely(ret < 0)) + return ret; + kmsg->msg.msg_name = &kmsg->addr; + kmsg->msg.msg_namelen = sr->addr_len; + } + ret = import_ubuf(ITER_SOURCE, sr->buf, sr->len, &kmsg->msg.msg_iter); + if (unlikely(ret < 0)) return ret; - io->msg.msg_name = &io->addr; - io->msg.msg_namelen = sr->addr_len; - return move_addr_to_kernel(sr->addr, sr->addr_len, &io->addr); + + return 0; } -int io_sendmsg_prep_async(struct io_kiocb *req) +static int io_sendmsg_prep_setup(struct io_kiocb *req, int is_msg) { - struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); + struct io_async_msghdr *kmsg; int ret; - sr->done_io = 0; - if (!io_msg_alloc_async_prep(req)) + /* always locked for prep */ + kmsg = io_msg_alloc_async(req, 0); + if (unlikely(!kmsg)) return -ENOMEM; - ret = io_sendmsg_copy_hdr(req, req->async_data); + if (!is_msg) + return io_send_setup(req); + ret = io_sendmsg_copy_hdr(req, kmsg); if (!ret) req->flags |= REQ_F_NEED_CLEANUP; return ret; } -void io_sendmsg_recvmsg_cleanup(struct io_kiocb *req) -{ - struct io_async_msghdr *io = req->async_data; - - kfree(io->free_iov); -} - int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); @@ -362,7 +368,7 @@ int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) if (req->ctx->compat) sr->msg_flags |= MSG_CMSG_COMPAT; #endif - return 0; + return io_sendmsg_prep_setup(req, req->opcode == IORING_OP_SENDMSG); } static void io_req_msg_cleanup(struct io_kiocb *req, @@ -379,7 +385,7 @@ static void io_req_msg_cleanup(struct io_kiocb *req, int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags) { struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); - struct io_async_msghdr *kmsg; + struct io_async_msghdr *kmsg = req->async_data; struct socket *sock; unsigned flags; int min_ret = 0; @@ -389,17 +395,6 @@ int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags) if (unlikely(!sock)) return -ENOTSOCK; - if (req_has_async_data(req)) { - kmsg = req->async_data; - } else { - kmsg = io_msg_alloc_async(req, issue_flags); - if (unlikely(!kmsg)) - return -ENOMEM; - ret = io_sendmsg_copy_hdr(req, kmsg); - if (ret) - return ret; - } - if (!(req->flags & REQ_F_POLLED) && (sr->flags & IORING_RECVSEND_POLL_FIRST)) return -EAGAIN; @@ -437,52 +432,10 @@ int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags) return IOU_OK; } -static struct io_async_msghdr *io_send_setup(struct io_kiocb *req, - unsigned int issue_flags) -{ - struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); - struct io_async_msghdr *kmsg; - int ret; - - if (req_has_async_data(req)) { - kmsg = req->async_data; - } else { - kmsg = io_msg_alloc_async(req, issue_flags); - if (unlikely(!kmsg)) - return ERR_PTR(-ENOMEM); - kmsg->msg.msg_name = NULL; - kmsg->msg.msg_namelen = 0; - kmsg->msg.msg_control = NULL; - kmsg->msg.msg_controllen = 0; - kmsg->msg.msg_ubuf = NULL; - - if (sr->addr) { - ret = move_addr_to_kernel(sr->addr, sr->addr_len, - &kmsg->addr); - if (unlikely(ret < 0)) - return ERR_PTR(ret); - kmsg->msg.msg_name = &kmsg->addr; - kmsg->msg.msg_namelen = sr->addr_len; - } - - ret = import_ubuf(ITER_SOURCE, sr->buf, sr->len, - &kmsg->msg.msg_iter); - if (unlikely(ret)) - return ERR_PTR(ret); - } - - if (!(req->flags & REQ_F_POLLED) && - (sr->flags & IORING_RECVSEND_POLL_FIRST)) - return ERR_PTR(-EAGAIN); - - return kmsg; -} - int io_send(struct io_kiocb *req, unsigned int issue_flags) { struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); - struct io_async_msghdr *kmsg; - size_t len = sr->len; + struct io_async_msghdr *kmsg = req->async_data; struct socket *sock; unsigned flags; int min_ret = 0; @@ -492,13 +445,9 @@ int io_send(struct io_kiocb *req, unsigned int issue_flags) if (unlikely(!sock)) return -ENOTSOCK; - kmsg = io_send_setup(req, issue_flags); - if (IS_ERR(kmsg)) - return PTR_ERR(kmsg); - - ret = import_ubuf(ITER_SOURCE, sr->buf, len, &kmsg->msg.msg_iter); - if (unlikely(ret)) - return ret; + if (!(req->flags & REQ_F_POLLED) && + (sr->flags & IORING_RECVSEND_POLL_FIRST)) + return -EAGAIN; flags = sr->msg_flags; if (issue_flags & IO_URING_F_NONBLOCK) @@ -1084,7 +1033,7 @@ int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) if (req->ctx->compat) zc->msg_flags |= MSG_CMSG_COMPAT; #endif - return 0; + return io_sendmsg_prep_setup(req, req->opcode == IORING_OP_SENDMSG_ZC); } static int io_sg_from_iter_iovec(struct sock *sk, struct sk_buff *skb, @@ -1173,7 +1122,7 @@ static int io_send_zc_import(struct io_kiocb *req, struct io_async_msghdr *kmsg) int io_send_zc(struct io_kiocb *req, unsigned int issue_flags) { struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg); - struct io_async_msghdr *kmsg; + struct io_async_msghdr *kmsg = req->async_data; struct socket *sock; unsigned msg_flags; int ret, min_ret = 0; @@ -1184,9 +1133,9 @@ int io_send_zc(struct io_kiocb *req, unsigned int issue_flags) if (!test_bit(SOCK_SUPPORT_ZC, &sock->flags)) return -EOPNOTSUPP; - kmsg = io_send_setup(req, issue_flags); - if (IS_ERR(kmsg)) - return PTR_ERR(kmsg); + if (!(req->flags & REQ_F_POLLED) && + (zc->flags & IORING_RECVSEND_POLL_FIRST)) + return -EAGAIN; if (!zc->done_io) { ret = io_send_zc_import(req, kmsg); @@ -1242,7 +1191,7 @@ int io_send_zc(struct io_kiocb *req, unsigned int issue_flags) int io_sendmsg_zc(struct io_kiocb *req, unsigned int issue_flags) { struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); - struct io_async_msghdr *kmsg; + struct io_async_msghdr *kmsg = req->async_data; struct socket *sock; unsigned flags; int ret, min_ret = 0; @@ -1255,17 +1204,6 @@ int io_sendmsg_zc(struct io_kiocb *req, unsigned int issue_flags) if (!test_bit(SOCK_SUPPORT_ZC, &sock->flags)) return -EOPNOTSUPP; - if (req_has_async_data(req)) { - kmsg = req->async_data; - } else { - kmsg = io_msg_alloc_async(req, issue_flags); - if (unlikely(!kmsg)) - return -ENOMEM; - ret = io_sendmsg_copy_hdr(req, kmsg); - if (ret) - return ret; - } - if (!(req->flags & REQ_F_POLLED) && (sr->flags & IORING_RECVSEND_POLL_FIRST)) return -EAGAIN; diff --git a/io_uring/net.h b/io_uring/net.h index 4b4fd9b1b7b4..f99ebb9dc0bb 100644 --- a/io_uring/net.h +++ b/io_uring/net.h @@ -34,13 +34,11 @@ struct io_async_connect { int io_shutdown_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe); int io_shutdown(struct io_kiocb *req, unsigned int issue_flags); -int io_sendmsg_prep_async(struct io_kiocb *req); void io_sendmsg_recvmsg_cleanup(struct io_kiocb *req); int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe); int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags); int io_send(struct io_kiocb *req, unsigned int issue_flags); -int io_sendrecv_prep_async(struct io_kiocb *req); int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe); int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags); diff --git a/io_uring/opdef.c b/io_uring/opdef.c index 1368193edc57..dd4a1e1425e1 100644 --- a/io_uring/opdef.c +++ b/io_uring/opdef.c @@ -527,7 +527,6 @@ const struct io_cold_def io_cold_defs[] = { .name = "SENDMSG", #if defined(CONFIG_NET) .async_size = sizeof(struct io_async_msghdr), - .prep_async = io_sendmsg_prep_async, .cleanup = io_sendmsg_recvmsg_cleanup, .fail = io_sendrecv_fail, #endif @@ -603,7 +602,6 @@ const struct io_cold_def io_cold_defs[] = { .async_size = sizeof(struct io_async_msghdr), .cleanup = io_sendmsg_recvmsg_cleanup, .fail = io_sendrecv_fail, - .prep_async = io_sendrecv_prep_async, #endif }, [IORING_OP_RECV] = { @@ -688,7 +686,6 @@ const struct io_cold_def io_cold_defs[] = { .name = "SEND_ZC", #if defined(CONFIG_NET) .async_size = sizeof(struct io_async_msghdr), - .prep_async = io_sendrecv_prep_async, .cleanup = io_send_zc_cleanup, .fail = io_sendrecv_fail, #endif @@ -697,7 +694,6 @@ const struct io_cold_def io_cold_defs[] = { .name = "SENDMSG_ZC", #if defined(CONFIG_NET) .async_size = sizeof(struct io_async_msghdr), - .prep_async = io_sendmsg_prep_async, .cleanup = io_send_zc_cleanup, .fail = io_sendrecv_fail, #endif -- cgit v1.2.3-59-g8ed1b From 75191341785eef51f87ff54b0ed9dfbd5a72e7c2 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sat, 16 Mar 2024 15:33:53 -0600 Subject: io_uring/net: add iovec recycling Right now the io_async_msghdr is recycled to avoid the overhead of allocating+freeing it for every request. But the iovec is not included, hence that will be allocated and freed for each transfer regardless. This commit enables recyling of the iovec between io_async_msghdr recycles. This avoids alloc+free for each one if an iovec is used, and on top of that, it extends the cache hot nature of msg to the iovec as well. Also enables KASAN for the iovec entries, so that reuse can be detected even while they are in the cache. The io_async_msghdr also shrinks from 376 -> 288 bytes, an 88 byte saving (or ~23% smaller), as the fast_iovec entry is dropped from 8 entries to a single entry. There's no point keeping a big fast iovec entry, if iovecs aren't being allocated and freed continually. Signed-off-by: Jens Axboe --- io_uring/net.c | 131 ++++++++++++++++++++++++++++++++++++--------------------- io_uring/net.h | 13 +++--- 2 files changed, 91 insertions(+), 53 deletions(-) (limited to 'io_uring/net.h') diff --git a/io_uring/net.c b/io_uring/net.c index adc6d6e1cce6..2727b67f6a72 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -115,15 +115,31 @@ static bool io_net_retry(struct socket *sock, int flags) return sock->type == SOCK_STREAM || sock->type == SOCK_SEQPACKET; } +static void io_netmsg_iovec_free(struct io_async_msghdr *kmsg) +{ + if (kmsg->free_iov) { + kfree(kmsg->free_iov); + kmsg->free_iov_nr = 0; + kmsg->free_iov = NULL; + } +} + static void io_netmsg_recycle(struct io_kiocb *req, unsigned int issue_flags) { struct io_async_msghdr *hdr = req->async_data; + struct iovec *iov; - if (unlikely(issue_flags & IO_URING_F_UNLOCKED)) + /* can't recycle, ensure we free the iovec if we have one */ + if (unlikely(issue_flags & IO_URING_F_UNLOCKED)) { + io_netmsg_iovec_free(hdr); return; + } /* Let normal cleanup path reap it if we fail adding to the cache */ + iov = hdr->free_iov; if (io_alloc_cache_put(&req->ctx->netmsg_cache, &hdr->cache)) { + if (iov) + kasan_mempool_poison_object(iov); req->async_data = NULL; req->flags &= ~REQ_F_ASYNC_DATA; } @@ -138,7 +154,11 @@ static struct io_async_msghdr *io_msg_alloc_async(struct io_kiocb *req) entry = io_alloc_cache_get(&ctx->netmsg_cache); if (entry) { hdr = container_of(entry, struct io_async_msghdr, cache); - hdr->free_iov = NULL; + if (hdr->free_iov) { + kasan_mempool_unpoison_object(hdr->free_iov, + hdr->free_iov_nr * sizeof(struct iovec)); + req->flags |= REQ_F_NEED_CLEANUP; + } req->flags |= REQ_F_ASYNC_DATA; req->async_data = hdr; return hdr; @@ -146,12 +166,27 @@ static struct io_async_msghdr *io_msg_alloc_async(struct io_kiocb *req) if (!io_alloc_async_data(req)) { hdr = req->async_data; + hdr->free_iov_nr = 0; hdr->free_iov = NULL; return hdr; } return NULL; } +/* assign new iovec to kmsg, if we need to */ +static int io_net_vec_assign(struct io_kiocb *req, struct io_async_msghdr *kmsg, + struct iovec *iov) +{ + if (iov) { + req->flags |= REQ_F_NEED_CLEANUP; + kmsg->free_iov_nr = kmsg->msg.msg_iter.nr_segs; + if (kmsg->free_iov) + kfree(kmsg->free_iov); + kmsg->free_iov = iov; + } + return 0; +} + #ifdef CONFIG_COMPAT static int io_compat_msg_copy_hdr(struct io_kiocb *req, struct io_async_msghdr *iomsg, @@ -159,7 +194,16 @@ static int io_compat_msg_copy_hdr(struct io_kiocb *req, { struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); struct compat_iovec __user *uiov; - int ret; + struct iovec *iov; + int ret, nr_segs; + + if (iomsg->free_iov) { + nr_segs = iomsg->free_iov_nr; + iov = iomsg->free_iov; + } else { + iov = &iomsg->fast_iov; + nr_segs = 1; + } if (copy_from_user(msg, sr->umsg_compat, sizeof(*msg))) return -EFAULT; @@ -168,9 +212,9 @@ static int io_compat_msg_copy_hdr(struct io_kiocb *req, if (req->flags & REQ_F_BUFFER_SELECT) { compat_ssize_t clen; - iomsg->free_iov = NULL; if (msg->msg_iovlen == 0) { - sr->len = 0; + sr->len = iov->iov_len = 0; + iov->iov_base = NULL; } else if (msg->msg_iovlen > 1) { return -EINVAL; } else { @@ -186,14 +230,12 @@ static int io_compat_msg_copy_hdr(struct io_kiocb *req, return 0; } - iomsg->free_iov = iomsg->fast_iov; ret = __import_iovec(ddir, (struct iovec __user *)uiov, msg->msg_iovlen, - UIO_FASTIOV, &iomsg->free_iov, - &iomsg->msg.msg_iter, true); + nr_segs, &iov, &iomsg->msg.msg_iter, true); if (unlikely(ret < 0)) return ret; - return 0; + return io_net_vec_assign(req, iomsg, iov); } #endif @@ -201,7 +243,16 @@ static int io_msg_copy_hdr(struct io_kiocb *req, struct io_async_msghdr *iomsg, struct user_msghdr *msg, int ddir) { struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); - int ret; + struct iovec *iov; + int ret, nr_segs; + + if (iomsg->free_iov) { + nr_segs = iomsg->free_iov_nr; + iov = iomsg->free_iov; + } else { + iov = &iomsg->fast_iov; + nr_segs = 1; + } if (!user_access_begin(sr->umsg, sizeof(*sr->umsg))) return -EFAULT; @@ -217,9 +268,8 @@ static int io_msg_copy_hdr(struct io_kiocb *req, struct io_async_msghdr *iomsg, if (req->flags & REQ_F_BUFFER_SELECT) { if (msg->msg_iovlen == 0) { - sr->len = iomsg->fast_iov[0].iov_len = 0; - iomsg->fast_iov[0].iov_base = NULL; - iomsg->free_iov = NULL; + sr->len = iov->iov_len = 0; + iov->iov_base = NULL; } else if (msg->msg_iovlen > 1) { ret = -EINVAL; goto ua_end; @@ -227,10 +277,9 @@ static int io_msg_copy_hdr(struct io_kiocb *req, struct io_async_msghdr *iomsg, /* we only need the length for provided buffers */ if (!access_ok(&msg->msg_iov[0].iov_len, sizeof(__kernel_size_t))) goto ua_end; - unsafe_get_user(iomsg->fast_iov[0].iov_len, - &msg->msg_iov[0].iov_len, ua_end); - sr->len = iomsg->fast_iov[0].iov_len; - iomsg->free_iov = NULL; + unsafe_get_user(iov->iov_len, &msg->msg_iov[0].iov_len, + ua_end); + sr->len = iov->iov_len; } ret = 0; ua_end: @@ -239,13 +288,12 @@ ua_end: } user_access_end(); - iomsg->free_iov = iomsg->fast_iov; - ret = __import_iovec(ddir, msg->msg_iov, msg->msg_iovlen, UIO_FASTIOV, - &iomsg->free_iov, &iomsg->msg.msg_iter, false); + ret = __import_iovec(ddir, msg->msg_iov, msg->msg_iovlen, nr_segs, + &iov, &iomsg->msg.msg_iter, false); if (unlikely(ret < 0)) return ret; - return 0; + return io_net_vec_assign(req, iomsg, iov); } static int io_sendmsg_copy_hdr(struct io_kiocb *req, @@ -285,7 +333,7 @@ void io_sendmsg_recvmsg_cleanup(struct io_kiocb *req) { struct io_async_msghdr *io = req->async_data; - kfree(io->free_iov); + io_netmsg_iovec_free(io); } static int io_send_setup(struct io_kiocb *req) @@ -366,9 +414,6 @@ static void io_req_msg_cleanup(struct io_kiocb *req, unsigned int issue_flags) { req->flags &= ~REQ_F_NEED_CLEANUP; - /* fast path, check for non-NULL to avoid function call */ - if (kmsg->free_iov) - kfree(kmsg->free_iov); io_netmsg_recycle(req, issue_flags); } @@ -621,11 +666,6 @@ static inline void io_recv_prep_retry(struct io_kiocb *req, { struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); - if (kmsg->free_iov) { - kfree(kmsg->free_iov); - kmsg->free_iov = NULL; - } - req->flags &= ~REQ_F_BL_EMPTY; sr->done_io = 0; sr->len = 0; /* get from the provided buffer */ @@ -941,14 +981,10 @@ out_free: void io_send_zc_cleanup(struct io_kiocb *req) { struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg); - struct io_async_msghdr *io; + struct io_async_msghdr *io = req->async_data; - if (req_has_async_data(req)) { - io = req->async_data; - /* might be ->fast_iov if *msg_copy_hdr failed */ - if (io->free_iov != io->fast_iov) - kfree(io->free_iov); - } + if (req_has_async_data(req)) + io_netmsg_iovec_free(io); if (zc->notif) { io_notif_flush(zc->notif); zc->notif = NULL; @@ -1170,8 +1206,7 @@ int io_send_zc(struct io_kiocb *req, unsigned int issue_flags) */ if (!(issue_flags & IO_URING_F_UNLOCKED)) { io_notif_flush(zc->notif); - io_netmsg_recycle(req, issue_flags); - req->flags &= ~REQ_F_NEED_CLEANUP; + io_req_msg_cleanup(req, kmsg, 0); } io_req_set_res(req, ret, IORING_CQE_F_MORE); return IOU_OK; @@ -1221,13 +1256,7 @@ int io_sendmsg_zc(struct io_kiocb *req, unsigned int issue_flags) ret = -EINTR; req_set_fail(req); } - /* fast path, check for non-NULL to avoid function call */ - if (kmsg->free_iov) { - kfree(kmsg->free_iov); - kmsg->free_iov = NULL; - } - io_netmsg_recycle(req, issue_flags); if (ret >= 0) ret += sr->done_io; else if (sr->done_io) @@ -1239,7 +1268,7 @@ int io_sendmsg_zc(struct io_kiocb *req, unsigned int issue_flags) */ if (!(issue_flags & IO_URING_F_UNLOCKED)) { io_notif_flush(sr->notif); - req->flags &= ~REQ_F_NEED_CLEANUP; + io_req_msg_cleanup(req, kmsg, 0); } io_req_set_res(req, ret, IORING_CQE_F_MORE); return IOU_OK; @@ -1483,6 +1512,14 @@ out: void io_netmsg_cache_free(struct io_cache_entry *entry) { - kfree(container_of(entry, struct io_async_msghdr, cache)); + struct io_async_msghdr *kmsg; + + kmsg = container_of(entry, struct io_async_msghdr, cache); + if (kmsg->free_iov) { + kasan_mempool_unpoison_object(kmsg->free_iov, + kmsg->free_iov_nr * sizeof(struct iovec)); + io_netmsg_iovec_free(kmsg); + } + kfree(kmsg); } #endif diff --git a/io_uring/net.h b/io_uring/net.h index f99ebb9dc0bb..0aef1c992aee 100644 --- a/io_uring/net.h +++ b/io_uring/net.h @@ -8,17 +8,18 @@ struct io_async_msghdr { #if defined(CONFIG_NET) union { - struct iovec fast_iov[UIO_FASTIOV]; + struct iovec fast_iov; struct { - struct iovec fast_iov_one; - __kernel_size_t controllen; - int namelen; - __kernel_size_t payloadlen; + struct io_cache_entry cache; + /* entry size of ->free_iov, if valid */ + int free_iov_nr; }; - struct io_cache_entry cache; }; /* points to an allocated iov, if NULL we use fast_iov instead */ struct iovec *free_iov; + __kernel_size_t controllen; + __kernel_size_t payloadlen; + int namelen; struct sockaddr __user *uaddr; struct msghdr msg; struct sockaddr_storage addr; -- cgit v1.2.3-59-g8ed1b From e2ea5a7069133c01fe3dbda95d77af7f193a1a52 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 18 Mar 2024 20:37:22 -0600 Subject: io_uring/net: move connect to always using async data While doing that, get rid of io_async_connect and just use the generic io_async_msghdr. Both of them have a struct sockaddr_storage in there, and while io_async_msghdr is bigger, if the same type can be used then the netmsg_cache can get reused for connect as well. Signed-off-by: Jens Axboe --- io_uring/net.c | 41 +++++++++++------------------------------ io_uring/net.h | 5 ----- io_uring/opdef.c | 3 +-- 3 files changed, 12 insertions(+), 37 deletions(-) (limited to 'io_uring/net.h') diff --git a/io_uring/net.c b/io_uring/net.c index 1e1d77321fce..3bef562b67de 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -1428,17 +1428,10 @@ int io_socket(struct io_kiocb *req, unsigned int issue_flags) return IOU_OK; } -int io_connect_prep_async(struct io_kiocb *req) -{ - struct io_async_connect *io = req->async_data; - struct io_connect *conn = io_kiocb_to_cmd(req, struct io_connect); - - return move_addr_to_kernel(conn->addr, conn->addr_len, &io->address); -} - int io_connect_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { struct io_connect *conn = io_kiocb_to_cmd(req, struct io_connect); + struct io_async_msghdr *io; if (sqe->len || sqe->buf_index || sqe->rw_flags || sqe->splice_fd_in) return -EINVAL; @@ -1446,32 +1439,26 @@ int io_connect_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) conn->addr = u64_to_user_ptr(READ_ONCE(sqe->addr)); conn->addr_len = READ_ONCE(sqe->addr2); conn->in_progress = conn->seen_econnaborted = false; - return 0; + + io = io_msg_alloc_async(req); + if (unlikely(!io)) + return -ENOMEM; + + return move_addr_to_kernel(conn->addr, conn->addr_len, &io->addr); } int io_connect(struct io_kiocb *req, unsigned int issue_flags) { struct io_connect *connect = io_kiocb_to_cmd(req, struct io_connect); - struct io_async_connect __io, *io; + struct io_async_msghdr *io = req->async_data; unsigned file_flags; int ret; bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; - if (req_has_async_data(req)) { - io = req->async_data; - } else { - ret = move_addr_to_kernel(connect->addr, - connect->addr_len, - &__io.address); - if (ret) - goto out; - io = &__io; - } - file_flags = force_nonblock ? O_NONBLOCK : 0; - ret = __sys_connect_file(req->file, &io->address, - connect->addr_len, file_flags); + ret = __sys_connect_file(req->file, &io->addr, connect->addr_len, + file_flags); if ((ret == -EAGAIN || ret == -EINPROGRESS || ret == -ECONNABORTED) && force_nonblock) { if (ret == -EINPROGRESS) { @@ -1481,13 +1468,6 @@ int io_connect(struct io_kiocb *req, unsigned int issue_flags) goto out; connect->seen_econnaborted = true; } - if (req_has_async_data(req)) - return -EAGAIN; - if (io_alloc_async_data(req)) { - ret = -ENOMEM; - goto out; - } - memcpy(req->async_data, &__io, sizeof(__io)); return -EAGAIN; } if (connect->in_progress) { @@ -1505,6 +1485,7 @@ int io_connect(struct io_kiocb *req, unsigned int issue_flags) out: if (ret < 0) req_set_fail(req); + io_req_msg_cleanup(req, issue_flags); io_req_set_res(req, ret, 0); return IOU_OK; } diff --git a/io_uring/net.h b/io_uring/net.h index 0aef1c992aee..b47b43ec6459 100644 --- a/io_uring/net.h +++ b/io_uring/net.h @@ -28,10 +28,6 @@ struct io_async_msghdr { #if defined(CONFIG_NET) -struct io_async_connect { - struct sockaddr_storage address; -}; - int io_shutdown_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe); int io_shutdown(struct io_kiocb *req, unsigned int issue_flags); @@ -53,7 +49,6 @@ int io_accept(struct io_kiocb *req, unsigned int issue_flags); int io_socket_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe); int io_socket(struct io_kiocb *req, unsigned int issue_flags); -int io_connect_prep_async(struct io_kiocb *req); int io_connect_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe); int io_connect(struct io_kiocb *req, unsigned int issue_flags); diff --git a/io_uring/opdef.c b/io_uring/opdef.c index fcae75a08f2c..1951107210d4 100644 --- a/io_uring/opdef.c +++ b/io_uring/opdef.c @@ -557,8 +557,7 @@ const struct io_cold_def io_cold_defs[] = { [IORING_OP_CONNECT] = { .name = "CONNECT", #if defined(CONFIG_NET) - .async_size = sizeof(struct io_async_connect), - .prep_async = io_connect_prep_async, + .async_size = sizeof(struct io_async_msghdr), #endif }, [IORING_OP_FALLOCATE] = { -- cgit v1.2.3-59-g8ed1b From 414d0f45c316221acbf066658afdbae5b354a5cc Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 20 Mar 2024 15:19:44 -0600 Subject: io_uring/alloc_cache: switch to array based caching Currently lists are being used to manage this, but best practice is usually to have these in an array instead as that it cheaper to manage. Outside of that detail, games are also played with KASAN as the list is inside the cached entry itself. Finally, all users of this need a struct io_cache_entry embedded in their struct, which is union'ized with something else in there that isn't used across the free -> realloc cycle. Get rid of all of that, and simply have it be an array. This will not change the memory used, as we're just trading an 8-byte member entry for the per-elem array size. This reduces the overhead of the recycled allocations, and it reduces the amount of code code needed to support recycling to about half of what it currently is. Signed-off-by: Jens Axboe --- include/linux/io_uring_types.h | 2 +- io_uring/alloc_cache.h | 57 +++++++++++++++++++----------------------- io_uring/futex.c | 30 +++++++++------------- io_uring/futex.h | 5 ++-- io_uring/io_uring.c | 34 ++++++++++++++----------- io_uring/net.c | 13 ++++------ io_uring/net.h | 18 ++++--------- io_uring/poll.c | 12 +++------ io_uring/poll.h | 9 +------ io_uring/rsrc.c | 10 +++----- io_uring/rsrc.h | 7 +----- io_uring/rw.c | 14 +++++------ io_uring/rw.h | 7 ++---- io_uring/uring_cmd.c | 14 +++-------- io_uring/uring_cmd.h | 6 +---- 15 files changed, 93 insertions(+), 145 deletions(-) (limited to 'io_uring/net.h') diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index 0f24fdad19c2..ef45b8bd1b35 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -220,7 +220,7 @@ struct io_ev_fd { }; struct io_alloc_cache { - struct io_wq_work_node list; + void **entries; unsigned int nr_cached; unsigned int max_cached; size_t elem_size; diff --git a/io_uring/alloc_cache.h b/io_uring/alloc_cache.h index 138ad14b0b12..b7a38a2069cf 100644 --- a/io_uring/alloc_cache.h +++ b/io_uring/alloc_cache.h @@ -6,61 +6,56 @@ */ #define IO_ALLOC_CACHE_MAX 128 -struct io_cache_entry { - struct io_wq_work_node node; -}; - static inline bool io_alloc_cache_put(struct io_alloc_cache *cache, - struct io_cache_entry *entry) + void *entry) { if (cache->nr_cached < cache->max_cached) { - cache->nr_cached++; - wq_stack_add_head(&entry->node, &cache->list); - kasan_mempool_poison_object(entry); + if (!kasan_mempool_poison_object(entry)) + return false; + cache->entries[cache->nr_cached++] = entry; return true; } return false; } -static inline bool io_alloc_cache_empty(struct io_alloc_cache *cache) -{ - return !cache->list.next; -} - -static inline struct io_cache_entry *io_alloc_cache_get(struct io_alloc_cache *cache) +static inline void *io_alloc_cache_get(struct io_alloc_cache *cache) { - if (cache->list.next) { - struct io_cache_entry *entry; + if (cache->nr_cached) { + void *entry = cache->entries[--cache->nr_cached]; - entry = container_of(cache->list.next, struct io_cache_entry, node); kasan_mempool_unpoison_object(entry, cache->elem_size); - cache->list.next = cache->list.next->next; - cache->nr_cached--; return entry; } return NULL; } -static inline void io_alloc_cache_init(struct io_alloc_cache *cache, +/* returns false if the cache was initialized properly */ +static inline bool io_alloc_cache_init(struct io_alloc_cache *cache, unsigned max_nr, size_t size) { - cache->list.next = NULL; - cache->nr_cached = 0; - cache->max_cached = max_nr; - cache->elem_size = size; + cache->entries = kvmalloc_array(max_nr, sizeof(void *), GFP_KERNEL); + if (cache->entries) { + cache->nr_cached = 0; + cache->max_cached = max_nr; + cache->elem_size = size; + return false; + } + return true; } static inline void io_alloc_cache_free(struct io_alloc_cache *cache, - void (*free)(struct io_cache_entry *)) + void (*free)(const void *)) { - while (1) { - struct io_cache_entry *entry = io_alloc_cache_get(cache); + void *entry; + + if (!cache->entries) + return; - if (!entry) - break; + while ((entry = io_alloc_cache_get(cache)) != NULL) free(entry); - } - cache->nr_cached = 0; + + kvfree(cache->entries); + cache->entries = NULL; } #endif diff --git a/io_uring/futex.c b/io_uring/futex.c index 792a03df58de..914848f46beb 100644 --- a/io_uring/futex.c +++ b/io_uring/futex.c @@ -9,7 +9,7 @@ #include "../kernel/futex/futex.h" #include "io_uring.h" -#include "rsrc.h" +#include "alloc_cache.h" #include "futex.h" struct io_futex { @@ -27,27 +27,21 @@ struct io_futex { }; struct io_futex_data { - union { - struct futex_q q; - struct io_cache_entry cache; - }; + struct futex_q q; struct io_kiocb *req; }; -void io_futex_cache_init(struct io_ring_ctx *ctx) -{ - io_alloc_cache_init(&ctx->futex_cache, IO_NODE_ALLOC_CACHE_MAX, - sizeof(struct io_futex_data)); -} +#define IO_FUTEX_ALLOC_CACHE_MAX 32 -static void io_futex_cache_entry_free(struct io_cache_entry *entry) +bool io_futex_cache_init(struct io_ring_ctx *ctx) { - kfree(container_of(entry, struct io_futex_data, cache)); + return io_alloc_cache_init(&ctx->futex_cache, IO_FUTEX_ALLOC_CACHE_MAX, + sizeof(struct io_futex_data)); } void io_futex_cache_free(struct io_ring_ctx *ctx) { - io_alloc_cache_free(&ctx->futex_cache, io_futex_cache_entry_free); + io_alloc_cache_free(&ctx->futex_cache, kfree); } static void __io_futex_complete(struct io_kiocb *req, struct io_tw_state *ts) @@ -63,7 +57,7 @@ static void io_futex_complete(struct io_kiocb *req, struct io_tw_state *ts) struct io_ring_ctx *ctx = req->ctx; io_tw_lock(ctx, ts); - if (!io_alloc_cache_put(&ctx->futex_cache, &ifd->cache)) + if (!io_alloc_cache_put(&ctx->futex_cache, ifd)) kfree(ifd); __io_futex_complete(req, ts); } @@ -259,11 +253,11 @@ static void io_futex_wake_fn(struct wake_q_head *wake_q, struct futex_q *q) static struct io_futex_data *io_alloc_ifd(struct io_ring_ctx *ctx) { - struct io_cache_entry *entry; + struct io_futex_data *ifd; - entry = io_alloc_cache_get(&ctx->futex_cache); - if (entry) - return container_of(entry, struct io_futex_data, cache); + ifd = io_alloc_cache_get(&ctx->futex_cache); + if (ifd) + return ifd; return kmalloc(sizeof(struct io_futex_data), GFP_NOWAIT); } diff --git a/io_uring/futex.h b/io_uring/futex.h index 0847e9e8a127..b8bb09873d57 100644 --- a/io_uring/futex.h +++ b/io_uring/futex.h @@ -13,7 +13,7 @@ int io_futex_cancel(struct io_ring_ctx *ctx, struct io_cancel_data *cd, unsigned int issue_flags); bool io_futex_remove_all(struct io_ring_ctx *ctx, struct task_struct *task, bool cancel_all); -void io_futex_cache_init(struct io_ring_ctx *ctx); +bool io_futex_cache_init(struct io_ring_ctx *ctx); void io_futex_cache_free(struct io_ring_ctx *ctx); #else static inline int io_futex_cancel(struct io_ring_ctx *ctx, @@ -27,8 +27,9 @@ static inline bool io_futex_remove_all(struct io_ring_ctx *ctx, { return false; } -static inline void io_futex_cache_init(struct io_ring_ctx *ctx) +static inline bool io_futex_cache_init(struct io_ring_ctx *ctx) { + return false; } static inline void io_futex_cache_free(struct io_ring_ctx *ctx) { diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 579618fad833..1d453eb8e49f 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -276,6 +276,7 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p) { struct io_ring_ctx *ctx; int hash_bits; + bool ret; ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); if (!ctx) @@ -305,17 +306,19 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p) INIT_LIST_HEAD(&ctx->cq_overflow_list); INIT_LIST_HEAD(&ctx->io_buffers_cache); INIT_HLIST_HEAD(&ctx->io_buf_list); - io_alloc_cache_init(&ctx->rsrc_node_cache, IO_NODE_ALLOC_CACHE_MAX, + ret = io_alloc_cache_init(&ctx->rsrc_node_cache, IO_NODE_ALLOC_CACHE_MAX, sizeof(struct io_rsrc_node)); - io_alloc_cache_init(&ctx->apoll_cache, IO_ALLOC_CACHE_MAX, + ret |= io_alloc_cache_init(&ctx->apoll_cache, IO_ALLOC_CACHE_MAX, sizeof(struct async_poll)); - io_alloc_cache_init(&ctx->netmsg_cache, IO_ALLOC_CACHE_MAX, + ret |= io_alloc_cache_init(&ctx->netmsg_cache, IO_ALLOC_CACHE_MAX, sizeof(struct io_async_msghdr)); - io_alloc_cache_init(&ctx->rw_cache, IO_ALLOC_CACHE_MAX, + ret |= io_alloc_cache_init(&ctx->rw_cache, IO_ALLOC_CACHE_MAX, sizeof(struct io_async_rw)); - io_alloc_cache_init(&ctx->uring_cache, IO_ALLOC_CACHE_MAX, + ret |= io_alloc_cache_init(&ctx->uring_cache, IO_ALLOC_CACHE_MAX, sizeof(struct uring_cache)); - io_futex_cache_init(ctx); + ret |= io_futex_cache_init(ctx); + if (ret) + goto err; init_completion(&ctx->ref_comp); xa_init_flags(&ctx->personalities, XA_FLAGS_ALLOC1); mutex_init(&ctx->uring_lock); @@ -345,6 +348,12 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p) return ctx; err: + io_alloc_cache_free(&ctx->rsrc_node_cache, kfree); + io_alloc_cache_free(&ctx->apoll_cache, kfree); + io_alloc_cache_free(&ctx->netmsg_cache, io_netmsg_cache_free); + io_alloc_cache_free(&ctx->rw_cache, io_rw_cache_free); + io_alloc_cache_free(&ctx->uring_cache, kfree); + io_futex_cache_free(ctx); kfree(ctx->cancel_table.hbs); kfree(ctx->cancel_table_locked.hbs); xa_destroy(&ctx->io_bl_xa); @@ -1482,7 +1491,7 @@ static void io_free_batch_list(struct io_ring_ctx *ctx, if (apoll->double_poll) kfree(apoll->double_poll); - if (!io_alloc_cache_put(&ctx->apoll_cache, &apoll->cache)) + if (!io_alloc_cache_put(&ctx->apoll_cache, apoll)) kfree(apoll); req->flags &= ~REQ_F_POLLED; } @@ -2778,11 +2787,6 @@ static void io_req_caches_free(struct io_ring_ctx *ctx) mutex_unlock(&ctx->uring_lock); } -static void io_rsrc_node_cache_free(struct io_cache_entry *entry) -{ - kfree(container_of(entry, struct io_rsrc_node, cache)); -} - static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx) { io_sq_thread_finish(ctx); @@ -2797,10 +2801,10 @@ static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx) __io_sqe_files_unregister(ctx); io_cqring_overflow_kill(ctx); io_eventfd_unregister(ctx); - io_alloc_cache_free(&ctx->apoll_cache, io_apoll_cache_free); + io_alloc_cache_free(&ctx->apoll_cache, kfree); io_alloc_cache_free(&ctx->netmsg_cache, io_netmsg_cache_free); io_alloc_cache_free(&ctx->rw_cache, io_rw_cache_free); - io_alloc_cache_free(&ctx->uring_cache, io_uring_cache_free); + io_alloc_cache_free(&ctx->uring_cache, kfree); io_futex_cache_free(ctx); io_destroy_buffers(ctx); mutex_unlock(&ctx->uring_lock); @@ -2816,7 +2820,7 @@ static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx) WARN_ON_ONCE(!list_empty(&ctx->rsrc_ref_list)); WARN_ON_ONCE(!list_empty(&ctx->ltimeout_list)); - io_alloc_cache_free(&ctx->rsrc_node_cache, io_rsrc_node_cache_free); + io_alloc_cache_free(&ctx->rsrc_node_cache, kfree); if (ctx->mm_account) { mmdrop(ctx->mm_account); ctx->mm_account = NULL; diff --git a/io_uring/net.c b/io_uring/net.c index 3bef562b67de..d0abc5689066 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -137,7 +137,7 @@ static void io_netmsg_recycle(struct io_kiocb *req, unsigned int issue_flags) /* Let normal cleanup path reap it if we fail adding to the cache */ iov = hdr->free_iov; - if (io_alloc_cache_put(&req->ctx->netmsg_cache, &hdr->cache)) { + if (io_alloc_cache_put(&req->ctx->netmsg_cache, hdr)) { if (iov) kasan_mempool_poison_object(iov); req->async_data = NULL; @@ -148,12 +148,10 @@ static void io_netmsg_recycle(struct io_kiocb *req, unsigned int issue_flags) static struct io_async_msghdr *io_msg_alloc_async(struct io_kiocb *req) { struct io_ring_ctx *ctx = req->ctx; - struct io_cache_entry *entry; struct io_async_msghdr *hdr; - entry = io_alloc_cache_get(&ctx->netmsg_cache); - if (entry) { - hdr = container_of(entry, struct io_async_msghdr, cache); + hdr = io_alloc_cache_get(&ctx->netmsg_cache); + if (hdr) { if (hdr->free_iov) { kasan_mempool_unpoison_object(hdr->free_iov, hdr->free_iov_nr * sizeof(struct iovec)); @@ -1490,11 +1488,10 @@ out: return IOU_OK; } -void io_netmsg_cache_free(struct io_cache_entry *entry) +void io_netmsg_cache_free(const void *entry) { - struct io_async_msghdr *kmsg; + struct io_async_msghdr *kmsg = (struct io_async_msghdr *) entry; - kmsg = container_of(entry, struct io_async_msghdr, cache); if (kmsg->free_iov) { kasan_mempool_unpoison_object(kmsg->free_iov, kmsg->free_iov_nr * sizeof(struct iovec)); diff --git a/io_uring/net.h b/io_uring/net.h index b47b43ec6459..0eb1c1920fc9 100644 --- a/io_uring/net.h +++ b/io_uring/net.h @@ -3,23 +3,15 @@ #include #include -#include "alloc_cache.h" - struct io_async_msghdr { #if defined(CONFIG_NET) - union { - struct iovec fast_iov; - struct { - struct io_cache_entry cache; - /* entry size of ->free_iov, if valid */ - int free_iov_nr; - }; - }; + struct iovec fast_iov; /* points to an allocated iov, if NULL we use fast_iov instead */ struct iovec *free_iov; + int free_iov_nr; + int namelen; __kernel_size_t controllen; __kernel_size_t payloadlen; - int namelen; struct sockaddr __user *uaddr; struct msghdr msg; struct sockaddr_storage addr; @@ -57,9 +49,9 @@ int io_sendmsg_zc(struct io_kiocb *req, unsigned int issue_flags); int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe); void io_send_zc_cleanup(struct io_kiocb *req); -void io_netmsg_cache_free(struct io_cache_entry *entry); +void io_netmsg_cache_free(const void *entry); #else -static inline void io_netmsg_cache_free(struct io_cache_entry *entry) +static inline void io_netmsg_cache_free(const void *entry) { } #endif diff --git a/io_uring/poll.c b/io_uring/poll.c index 5d55bbf1de15..0a8e02944689 100644 --- a/io_uring/poll.c +++ b/io_uring/poll.c @@ -14,6 +14,7 @@ #include #include "io_uring.h" +#include "alloc_cache.h" #include "refs.h" #include "napi.h" #include "opdef.h" @@ -686,17 +687,15 @@ static struct async_poll *io_req_alloc_apoll(struct io_kiocb *req, unsigned issue_flags) { struct io_ring_ctx *ctx = req->ctx; - struct io_cache_entry *entry; struct async_poll *apoll; if (req->flags & REQ_F_POLLED) { apoll = req->apoll; kfree(apoll->double_poll); } else if (!(issue_flags & IO_URING_F_UNLOCKED)) { - entry = io_alloc_cache_get(&ctx->apoll_cache); - if (entry == NULL) + apoll = io_alloc_cache_get(&ctx->apoll_cache); + if (!apoll) goto alloc_apoll; - apoll = container_of(entry, struct async_poll, cache); apoll->poll.retries = APOLL_MAX_RETRY; } else { alloc_apoll: @@ -1055,8 +1054,3 @@ out: io_req_set_res(req, ret, 0); return IOU_OK; } - -void io_apoll_cache_free(struct io_cache_entry *entry) -{ - kfree(container_of(entry, struct async_poll, cache)); -} diff --git a/io_uring/poll.h b/io_uring/poll.h index 1dacae9e816c..5c240f11069a 100644 --- a/io_uring/poll.h +++ b/io_uring/poll.h @@ -1,7 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -#include "alloc_cache.h" - enum { IO_APOLL_OK, IO_APOLL_ABORTED, @@ -17,10 +15,7 @@ struct io_poll { }; struct async_poll { - union { - struct io_poll poll; - struct io_cache_entry cache; - }; + struct io_poll poll; struct io_poll *double_poll; }; @@ -46,6 +41,4 @@ int io_arm_poll_handler(struct io_kiocb *req, unsigned issue_flags); bool io_poll_remove_all(struct io_ring_ctx *ctx, struct task_struct *tsk, bool cancel_all); -void io_apoll_cache_free(struct io_cache_entry *entry); - void io_poll_task_func(struct io_kiocb *req, struct io_tw_state *ts); diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c index 4818b79231dd..7b8a056f98ed 100644 --- a/io_uring/rsrc.c +++ b/io_uring/rsrc.c @@ -13,6 +13,7 @@ #include #include "io_uring.h" +#include "alloc_cache.h" #include "openclose.h" #include "rsrc.h" @@ -169,7 +170,7 @@ static void io_rsrc_put_work(struct io_rsrc_node *node) void io_rsrc_node_destroy(struct io_ring_ctx *ctx, struct io_rsrc_node *node) { - if (!io_alloc_cache_put(&ctx->rsrc_node_cache, &node->cache)) + if (!io_alloc_cache_put(&ctx->rsrc_node_cache, node)) kfree(node); } @@ -197,12 +198,9 @@ void io_rsrc_node_ref_zero(struct io_rsrc_node *node) struct io_rsrc_node *io_rsrc_node_alloc(struct io_ring_ctx *ctx) { struct io_rsrc_node *ref_node; - struct io_cache_entry *entry; - entry = io_alloc_cache_get(&ctx->rsrc_node_cache); - if (entry) { - ref_node = container_of(entry, struct io_rsrc_node, cache); - } else { + ref_node = io_alloc_cache_get(&ctx->rsrc_node_cache); + if (!ref_node) { ref_node = kzalloc(sizeof(*ref_node), GFP_KERNEL); if (!ref_node) return NULL; diff --git a/io_uring/rsrc.h b/io_uring/rsrc.h index e21000238954..83c079a707f8 100644 --- a/io_uring/rsrc.h +++ b/io_uring/rsrc.h @@ -2,8 +2,6 @@ #ifndef IOU_RSRC_H #define IOU_RSRC_H -#include "alloc_cache.h" - #define IO_NODE_ALLOC_CACHE_MAX 32 #define IO_RSRC_TAG_TABLE_SHIFT (PAGE_SHIFT - 3) @@ -36,10 +34,7 @@ struct io_rsrc_data { }; struct io_rsrc_node { - union { - struct io_cache_entry cache; - struct io_ring_ctx *ctx; - }; + struct io_ring_ctx *ctx; int refs; bool empty; u16 type; diff --git a/io_uring/rw.c b/io_uring/rw.c index e84d322a6150..3134a6ece1be 100644 --- a/io_uring/rw.c +++ b/io_uring/rw.c @@ -18,6 +18,7 @@ #include "io_uring.h" #include "opdef.h" #include "kbuf.h" +#include "alloc_cache.h" #include "rsrc.h" #include "poll.h" #include "rw.h" @@ -154,7 +155,7 @@ static void io_rw_recycle(struct io_kiocb *req, unsigned int issue_flags) return; } iov = rw->free_iovec; - if (io_alloc_cache_put(&req->ctx->rw_cache, &rw->cache)) { + if (io_alloc_cache_put(&req->ctx->rw_cache, rw)) { if (iov) kasan_mempool_poison_object(iov); req->async_data = NULL; @@ -200,12 +201,10 @@ static void io_req_rw_cleanup(struct io_kiocb *req, unsigned int issue_flags) static int io_rw_alloc_async(struct io_kiocb *req) { struct io_ring_ctx *ctx = req->ctx; - struct io_cache_entry *entry; struct io_async_rw *rw; - entry = io_alloc_cache_get(&ctx->rw_cache); - if (entry) { - rw = container_of(entry, struct io_async_rw, cache); + rw = io_alloc_cache_get(&ctx->rw_cache); + if (rw) { if (rw->free_iovec) { kasan_mempool_unpoison_object(rw->free_iovec, rw->free_iov_nr * sizeof(struct iovec)); @@ -1168,11 +1167,10 @@ int io_do_iopoll(struct io_ring_ctx *ctx, bool force_nonspin) return nr_events; } -void io_rw_cache_free(struct io_cache_entry *entry) +void io_rw_cache_free(const void *entry) { - struct io_async_rw *rw; + struct io_async_rw *rw = (struct io_async_rw *) entry; - rw = container_of(entry, struct io_async_rw, cache); if (rw->free_iovec) { kasan_mempool_unpoison_object(rw->free_iovec, rw->free_iov_nr * sizeof(struct iovec)); diff --git a/io_uring/rw.h b/io_uring/rw.h index cf51d0eb407a..3f432dc75441 100644 --- a/io_uring/rw.h +++ b/io_uring/rw.h @@ -3,10 +3,7 @@ #include struct io_async_rw { - union { - size_t bytes_done; - struct io_cache_entry cache; - }; + size_t bytes_done; struct iov_iter iter; struct iov_iter_state iter_state; struct iovec fast_iov; @@ -28,4 +25,4 @@ void io_rw_fail(struct io_kiocb *req); void io_req_rw_complete(struct io_kiocb *req, struct io_tw_state *ts); int io_read_mshot_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe); int io_read_mshot(struct io_kiocb *req, unsigned int issue_flags); -void io_rw_cache_free(struct io_cache_entry *entry); +void io_rw_cache_free(const void *entry); diff --git a/io_uring/uring_cmd.c b/io_uring/uring_cmd.c index 92346b5d9f5b..334d31dd6628 100644 --- a/io_uring/uring_cmd.c +++ b/io_uring/uring_cmd.c @@ -11,18 +11,17 @@ #include #include "io_uring.h" +#include "alloc_cache.h" #include "rsrc.h" #include "uring_cmd.h" static struct uring_cache *io_uring_async_get(struct io_kiocb *req) { struct io_ring_ctx *ctx = req->ctx; - struct io_cache_entry *entry; struct uring_cache *cache; - entry = io_alloc_cache_get(&ctx->uring_cache); - if (entry) { - cache = container_of(entry, struct uring_cache, cache); + cache = io_alloc_cache_get(&ctx->uring_cache); + if (cache) { req->flags |= REQ_F_ASYNC_DATA; req->async_data = cache; return cache; @@ -39,7 +38,7 @@ static void io_req_uring_cleanup(struct io_kiocb *req, unsigned int issue_flags) if (issue_flags & IO_URING_F_UNLOCKED) return; - if (io_alloc_cache_put(&req->ctx->uring_cache, &cache->cache)) { + if (io_alloc_cache_put(&req->ctx->uring_cache, cache)) { ioucmd->sqe = NULL; req->async_data = NULL; req->flags &= ~REQ_F_ASYNC_DATA; @@ -354,8 +353,3 @@ int io_uring_cmd_sock(struct io_uring_cmd *cmd, unsigned int issue_flags) } EXPORT_SYMBOL_GPL(io_uring_cmd_sock); #endif - -void io_uring_cache_free(struct io_cache_entry *entry) -{ - kfree(container_of(entry, struct uring_cache, cache)); -} diff --git a/io_uring/uring_cmd.h b/io_uring/uring_cmd.h index 477ea8865639..a361f98664d2 100644 --- a/io_uring/uring_cmd.h +++ b/io_uring/uring_cmd.h @@ -1,15 +1,11 @@ // SPDX-License-Identifier: GPL-2.0 struct uring_cache { - union { - struct io_cache_entry cache; - struct io_uring_sqe sqes[2]; - }; + struct io_uring_sqe sqes[2]; }; int io_uring_cmd(struct io_kiocb *req, unsigned int issue_flags); int io_uring_cmd_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe); -void io_uring_cache_free(struct io_cache_entry *entry); bool io_uring_try_cancel_uring_cmd(struct io_ring_ctx *ctx, struct task_struct *task, bool cancel_all); -- cgit v1.2.3-59-g8ed1b