From fa2c3254d7cfff5f7a916ab928a562d1165f17bb Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Thu, 20 Jan 2022 16:25:19 +0000 Subject: sched/tracing: Don't re-read p->state when emitting sched_switch event As of commit c6e7bd7afaeb ("sched/core: Optimize ttwu() spinning on p->on_cpu") the following sequence becomes possible: p->__state = TASK_INTERRUPTIBLE; __schedule() deactivate_task(p); ttwu() READ !p->on_rq p->__state=TASK_WAKING trace_sched_switch() __trace_sched_switch_state() task_state_index() return 0; TASK_WAKING isn't in TASK_REPORT, so the task appears as TASK_RUNNING in the trace event. Prevent this by pushing the value read from __schedule() down the trace event. Reported-by: Abhijeet Dharmapurikar Signed-off-by: Valentin Schneider Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Steven Rostedt (Google) Link: https://lore.kernel.org/r/20220120162520.570782-2-valentin.schneider@arm.com --- kernel/sched/core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel/sched/core.c') diff --git a/kernel/sched/core.c b/kernel/sched/core.c index ef946123e9af..3aafc15da24a 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -4836,7 +4836,7 @@ static struct rq *finish_task_switch(struct task_struct *prev) { struct rq *rq = this_rq(); struct mm_struct *mm = rq->prev_mm; - long prev_state; + unsigned int prev_state; /* * The previous task will have left us with a preempt_count of 2 @@ -6300,7 +6300,7 @@ static void __sched notrace __schedule(unsigned int sched_mode) migrate_disable_switch(rq, prev); psi_sched_switch(prev, next, !task_on_rq_queued(prev)); - trace_sched_switch(sched_mode & SM_MASK_PREEMPT, prev, next); + trace_sched_switch(sched_mode & SM_MASK_PREEMPT, prev_state, prev, next); /* Also unlocks the rq: */ rq = context_switch(rq, prev, next, &rf); -- cgit v1.2.3-59-g8ed1b