path: root/kernel
diff options
authorNeilBrown <neilb@suse.de>2015-02-13 15:49:17 +1100
committerIngo Molnar <mingo@kernel.org>2015-02-18 14:27:44 +0100
commit9cff8adeaa34b5d2802f03f89803da57856b3b72 (patch)
treeb23f52785c2f1fef1577a4cdf238389bec7e63c1 /kernel
parentsched/completion: Serialize completion_done() with complete() (diff)
sched: Prevent recursion in io_schedule()
io_schedule() calls blk_flush_plug() which, depending on the contents of current->plug, can initiate arbitrary blk-io requests. Note that this contrasts with blk_schedule_flush_plug() which requires all non-trivial work to be handed off to a separate thread. This makes it possible for io_schedule() to recurse, and initiating block requests could possibly call mempool_alloc() which, in times of memory pressure, uses io_schedule(). Apart from any stack usage issues, io_schedule() will not behave correctly when called recursively as delayacct_blkio_start() does not allow for repeated calls. So: - use ->in_iowait to detect recursion. Set it earlier, and restore it to the old value. - move the call to "raw_rq" after the call to blk_flush_plug(). As this is some sort of per-cpu thing, we want some chance that we are on the right CPU - When io_schedule() is called recurively, use blk_schedule_flush_plug() which cannot further recurse. - as this makes io_schedule() a lot more complex and as io_schedule() must match io_schedule_timeout(), but all the changes in io_schedule_timeout() and make io_schedule a simple wrapper for that. Signed-off-by: NeilBrown <neilb@suse.de> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> [ Moved the now rudimentary io_schedule() into sched.h. ] Cc: Jens Axboe <axboe@kernel.dk> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Tony Battersby <tonyb@cybernetics.com> Link: http://lkml.kernel.org/r/20150213162600.059fffb2@notabene.brown Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel')
1 files changed, 12 insertions, 19 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index c314000f5e52..daaea922f482 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -4358,36 +4358,29 @@ EXPORT_SYMBOL_GPL(yield_to);
* This task is about to go to sleep on IO. Increment rq->nr_iowait so
* that process accounting knows that this is a task in IO wait state.
-void __sched io_schedule(void)
- struct rq *rq = raw_rq();
- delayacct_blkio_start();
- atomic_inc(&rq->nr_iowait);
- blk_flush_plug(current);
- current->in_iowait = 1;
- schedule();
- current->in_iowait = 0;
- atomic_dec(&rq->nr_iowait);
- delayacct_blkio_end();
long __sched io_schedule_timeout(long timeout)
- struct rq *rq = raw_rq();
+ int old_iowait = current->in_iowait;
+ struct rq *rq;
long ret;
+ current->in_iowait = 1;
+ if (old_iowait)
+ blk_schedule_flush_plug(current);
+ else
+ blk_flush_plug(current);
+ rq = raw_rq();
- blk_flush_plug(current);
- current->in_iowait = 1;
ret = schedule_timeout(timeout);
- current->in_iowait = 0;
+ current->in_iowait = old_iowait;
return ret;
* sys_sched_get_priority_max - return maximum RT priority.