diff options
author | 2024-03-11 11:35:31 -0700 | |
---|---|---|
committer | 2024-03-11 11:35:31 -0700 | |
commit | d2c84bdce25a678c1e1f116d65b58790bd241af0 (patch) | |
tree | 45499e5ded0bec5bc0ac7e305ee19198374a02c4 /io_uring/sqpoll.c | |
parent | Merge tag 'vfs-6.9.uuid' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs (diff) | |
parent | io_uring: Fix sqpoll utilization check racing with dying sqpoll (diff) | |
download | wireguard-linux-d2c84bdce25a678c1e1f116d65b58790bd241af0.tar.xz wireguard-linux-d2c84bdce25a678c1e1f116d65b58790bd241af0.zip |
Merge tag 'for-6.9/io_uring-20240310' of git://git.kernel.dk/linux
Pull io_uring updates from Jens Axboe:
- Make running of task_work internal loops more fair, and unify how the
different methods deal with them (me)
- Support for per-ring NAPI. The two minor networking patches are in a
shared branch with netdev (Stefan)
- Add support for truncate (Tony)
- Export SQPOLL utilization stats (Xiaobing)
- Multishot fixes (Pavel)
- Fix for a race in manipulating the request flags via poll (Pavel)
- Cleanup the multishot checking by making it generic, moving it out of
opcode handlers (Pavel)
- Various tweaks and cleanups (me, Kunwu, Alexander)
* tag 'for-6.9/io_uring-20240310' of git://git.kernel.dk/linux: (53 commits)
io_uring: Fix sqpoll utilization check racing with dying sqpoll
io_uring/net: dedup io_recv_finish req completion
io_uring: refactor DEFER_TASKRUN multishot checks
io_uring: fix mshot io-wq checks
io_uring/net: add io_req_msg_cleanup() helper
io_uring/net: simplify msghd->msg_inq checking
io_uring/kbuf: rename REQ_F_PARTIAL_IO to REQ_F_BL_NO_RECYCLE
io_uring/net: remove dependency on REQ_F_PARTIAL_IO for sr->done_io
io_uring/net: correctly handle multishot recvmsg retry setup
io_uring/net: clear REQ_F_BL_EMPTY in the multishot retry handler
io_uring: fix io_queue_proc modifying req->flags
io_uring: fix mshot read defer taskrun cqe posting
io_uring/net: fix overflow check in io_recvmsg_mshot_prep()
io_uring/net: correct the type of variable
io_uring/sqpoll: statistics of the true utilization of sq threads
io_uring/net: move recv/recvmsg flags out of retry loop
io_uring/kbuf: flag request if buffer pool is empty after buffer pick
io_uring/net: improve the usercopy for sendmsg/recvmsg
io_uring/net: move receive multishot out of the generic msghdr path
io_uring/net: unify how recvmsg and sendmsg copy in the msghdr
...
Diffstat (limited to '')
-rw-r--r-- | io_uring/sqpoll.c | 59 |
1 files changed, 56 insertions, 3 deletions
diff --git a/io_uring/sqpoll.c b/io_uring/sqpoll.c index 65b5dbe3c850..363052b4ea76 100644 --- a/io_uring/sqpoll.c +++ b/io_uring/sqpoll.c @@ -15,9 +15,11 @@ #include <uapi/linux/io_uring.h> #include "io_uring.h" +#include "napi.h" #include "sqpoll.h" #define IORING_SQPOLL_CAP_ENTRIES_VALUE 8 +#define IORING_TW_CAP_ENTRIES_VALUE 8 enum { IO_SQ_THREAD_SHOULD_STOP = 0, @@ -193,6 +195,9 @@ static int __io_sq_thread(struct io_ring_ctx *ctx, bool cap_entries) ret = io_submit_sqes(ctx, to_submit); mutex_unlock(&ctx->uring_lock); + if (io_napi(ctx)) + ret += io_napi_sqpoll_busy_poll(ctx); + if (to_submit && wq_has_sleeper(&ctx->sqo_sq_wait)) wake_up(&ctx->sqo_sq_wait); if (creds) @@ -219,10 +224,52 @@ static bool io_sqd_handle_event(struct io_sq_data *sqd) return did_sig || test_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state); } +/* + * Run task_work, processing the retry_list first. The retry_list holds + * entries that we passed on in the previous run, if we had more task_work + * than we were asked to process. Newly queued task_work isn't run until the + * retry list has been fully processed. + */ +static unsigned int io_sq_tw(struct llist_node **retry_list, int max_entries) +{ + struct io_uring_task *tctx = current->io_uring; + unsigned int count = 0; + + if (*retry_list) { + *retry_list = io_handle_tw_list(*retry_list, &count, max_entries); + if (count >= max_entries) + return count; + max_entries -= count; + } + + *retry_list = tctx_task_work_run(tctx, max_entries, &count); + return count; +} + +static bool io_sq_tw_pending(struct llist_node *retry_list) +{ + struct io_uring_task *tctx = current->io_uring; + + return retry_list || !llist_empty(&tctx->task_list); +} + +static void io_sq_update_worktime(struct io_sq_data *sqd, struct rusage *start) +{ + struct rusage end; + + getrusage(current, RUSAGE_SELF, &end); + end.ru_stime.tv_sec -= start->ru_stime.tv_sec; + end.ru_stime.tv_usec -= start->ru_stime.tv_usec; + + sqd->work_time += end.ru_stime.tv_usec + end.ru_stime.tv_sec * 1000000; +} + static int io_sq_thread(void *data) { + struct llist_node *retry_list = NULL; struct io_sq_data *sqd = data; struct io_ring_ctx *ctx; + struct rusage start; unsigned long timeout = 0; char buf[TASK_COMM_LEN]; DEFINE_WAIT(wait); @@ -251,18 +298,21 @@ static int io_sq_thread(void *data) } cap_entries = !list_is_singular(&sqd->ctx_list); + getrusage(current, RUSAGE_SELF, &start); list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) { int ret = __io_sq_thread(ctx, cap_entries); if (!sqt_spin && (ret > 0 || !wq_list_empty(&ctx->iopoll_list))) sqt_spin = true; } - if (io_run_task_work()) + if (io_sq_tw(&retry_list, IORING_TW_CAP_ENTRIES_VALUE)) sqt_spin = true; if (sqt_spin || !time_after(jiffies, timeout)) { - if (sqt_spin) + if (sqt_spin) { + io_sq_update_worktime(sqd, &start); timeout = jiffies + sqd->sq_thread_idle; + } if (unlikely(need_resched())) { mutex_unlock(&sqd->lock); cond_resched(); @@ -273,7 +323,7 @@ static int io_sq_thread(void *data) } prepare_to_wait(&sqd->wait, &wait, TASK_INTERRUPTIBLE); - if (!io_sqd_events_pending(sqd) && !task_work_pending(current)) { + if (!io_sqd_events_pending(sqd) && !io_sq_tw_pending(retry_list)) { bool needs_sched = true; list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) { @@ -312,6 +362,9 @@ static int io_sq_thread(void *data) timeout = jiffies + sqd->sq_thread_idle; } + if (retry_list) + io_sq_tw(&retry_list, UINT_MAX); + io_uring_cancel_generic(true, sqd); sqd->thread = NULL; list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) |