aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sched
diff options
context:
space:
mode:
authorJens Axboe <axboe@kernel.dk>2019-10-22 10:25:58 -0600
committerJens Axboe <axboe@kernel.dk>2019-10-29 12:43:00 -0600
commit771b53d033e8663abdf59704806aa856b236dcdb (patch)
tree4b1d0bdf8a64787aed08b2e4992d20553d2b5888 /kernel/sched
parentio_uring: Fix mm_fault with READ/WRITE_FIXED (diff)
downloadlinux-dev-771b53d033e8663abdf59704806aa856b236dcdb.tar.xz
linux-dev-771b53d033e8663abdf59704806aa856b236dcdb.zip
io-wq: small threadpool implementation for io_uring
This adds support for io-wq, a smaller and specialized thread pool implementation. This is meant to replace workqueues for io_uring. Among the reasons for this addition are: - We can assign memory context smarter and more persistently if we manage the life time of threads. - We can drop various work-arounds we have in io_uring, like the async_list. - We can implement hashed work insertion, to manage concurrency of buffered writes without needing a) an extra workqueue, or b) needlessly making the concurrency of said workqueue very low which hurts performance of multiple buffered file writers. - We can implement cancel through signals, for cancelling interruptible work like read/write (or send/recv) to/from sockets. - We need the above cancel for being able to assign and use file tables from a process. - We can implement a more thorough cancel operation in general. - We need it to move towards a syslet/threadlet model for even faster async execution. For that we need to take ownership of the used threads. This list is just off the top of my head. Performance should be the same, or better, at least that's what I've seen in my testing. io-wq supports basic NUMA functionality, setting up a pool per node. io-wq hooks up to the scheduler schedule in/out just like workqueue and uses that to drive the need for more/less workers. Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org> Signed-off-by: Jens Axboe <axboe@kernel.dk>
Diffstat (limited to 'kernel/sched')
-rw-r--r--kernel/sched/core.c16
1 files changed, 12 insertions, 4 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index dd05a378631a..a95a2f05f3b5 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -16,6 +16,7 @@
#include <asm/tlb.h>
#include "../workqueue_internal.h"
+#include "../../fs/io-wq.h"
#include "../smpboot.h"
#include "pelt.h"
@@ -4103,9 +4104,12 @@ static inline void sched_submit_work(struct task_struct *tsk)
* we disable preemption to avoid it calling schedule() again
* in the possible wakeup of a kworker.
*/
- if (tsk->flags & PF_WQ_WORKER) {
+ if (tsk->flags & (PF_WQ_WORKER | PF_IO_WORKER)) {
preempt_disable();
- wq_worker_sleeping(tsk);
+ if (tsk->flags & PF_WQ_WORKER)
+ wq_worker_sleeping(tsk);
+ else
+ io_wq_worker_sleeping(tsk);
preempt_enable_no_resched();
}
@@ -4122,8 +4126,12 @@ static inline void sched_submit_work(struct task_struct *tsk)
static void sched_update_worker(struct task_struct *tsk)
{
- if (tsk->flags & PF_WQ_WORKER)
- wq_worker_running(tsk);
+ if (tsk->flags & (PF_WQ_WORKER | PF_IO_WORKER)) {
+ if (tsk->flags & PF_WQ_WORKER)
+ wq_worker_running(tsk);
+ else
+ io_wq_worker_running(tsk);
+ }
}
asmlinkage __visible void __sched schedule(void)