diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2022-01-11 17:14:59 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2022-01-11 17:14:59 -0800 |
commit | 6ae71436cda740148640046d58190a5bbc3ac86d (patch) | |
tree | d30635c5c06ac114ca9b07bf96c7ea0bb3904f2f /kernel/sched/core_sched.c | |
parent | Merge tag 'Wcast-function-type-5.17-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/gustavoars/linux (diff) | |
parent | sched/fair: Replace CFS internal cpu_util() with cpu_util_cfs() (diff) | |
download | linux-dev-6ae71436cda740148640046d58190a5bbc3ac86d.tar.xz linux-dev-6ae71436cda740148640046d58190a5bbc3ac86d.zip |
Merge tag 'sched_core_for_v5.17_rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull scheduler updates from Borislav Petkov:
"Mostly minor things this time; some highlights:
- core-sched: Add 'Forced Idle' accounting; this allows to track how
much CPU time is 'lost' due to core scheduling constraints.
- psi: Fix for MEM_FULL; a task running reclaim would be counted as a
runnable task and prevent MEM_FULL from being reported.
- cpuacct: Long standing fixes for some cgroup accounting issues.
- rt: Bandwidth timer could, under unusual circumstances, be failed
to armed, leading to indefinite throttling."
[ Description above by Peter Zijlstra ]
* tag 'sched_core_for_v5.17_rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
sched/fair: Replace CFS internal cpu_util() with cpu_util_cfs()
sched/fair: Cleanup task_util and capacity type
sched/rt: Try to restart rt period timer when rt runtime exceeded
sched/fair: Document the slow path and fast path in select_task_rq_fair
sched/fair: Fix per-CPU kthread and wakee stacking for asym CPU capacity
sched/fair: Fix detection of per-CPU kthreads waking a task
sched/cpuacct: Make user/system times in cpuacct.stat more precise
sched/cpuacct: Fix user/system in shown cpuacct.usage*
cpuacct: Convert BUG_ON() to WARN_ON_ONCE()
cputime, cpuacct: Include guest time in user time in cpuacct.stat
psi: Fix PSI_MEM_FULL state when tasks are in memstall and doing reclaim
sched/core: Forced idle accounting
psi: Add a missing SPDX license header
psi: Remove repeated verbose comment
Diffstat (limited to '')
-rw-r--r-- | kernel/sched/core_sched.c | 66 |
1 files changed, 65 insertions, 1 deletions
diff --git a/kernel/sched/core_sched.c b/kernel/sched/core_sched.c index 517f72b008f5..1fb45672ec85 100644 --- a/kernel/sched/core_sched.c +++ b/kernel/sched/core_sched.c @@ -73,7 +73,7 @@ static unsigned long sched_core_update_cookie(struct task_struct *p, enqueued = sched_core_enqueued(p); if (enqueued) - sched_core_dequeue(rq, p); + sched_core_dequeue(rq, p, DEQUEUE_SAVE); old_cookie = p->core_cookie; p->core_cookie = cookie; @@ -85,6 +85,10 @@ static unsigned long sched_core_update_cookie(struct task_struct *p, * If task is currently running, it may not be compatible anymore after * the cookie change, so enter the scheduler on its CPU to schedule it * away. + * + * Note that it is possible that as a result of this cookie change, the + * core has now entered/left forced idle state. Defer accounting to the + * next scheduling edge, rather than always forcing a reschedule here. */ if (task_running(rq, p)) resched_curr(rq); @@ -232,3 +236,63 @@ out: return err; } +#ifdef CONFIG_SCHEDSTATS + +/* REQUIRES: rq->core's clock recently updated. */ +void __sched_core_account_forceidle(struct rq *rq) +{ + const struct cpumask *smt_mask = cpu_smt_mask(cpu_of(rq)); + u64 delta, now = rq_clock(rq->core); + struct rq *rq_i; + struct task_struct *p; + int i; + + lockdep_assert_rq_held(rq); + + WARN_ON_ONCE(!rq->core->core_forceidle_count); + + if (rq->core->core_forceidle_start == 0) + return; + + delta = now - rq->core->core_forceidle_start; + if (unlikely((s64)delta <= 0)) + return; + + rq->core->core_forceidle_start = now; + + if (WARN_ON_ONCE(!rq->core->core_forceidle_occupation)) { + /* can't be forced idle without a running task */ + } else if (rq->core->core_forceidle_count > 1 || + rq->core->core_forceidle_occupation > 1) { + /* + * For larger SMT configurations, we need to scale the charged + * forced idle amount since there can be more than one forced + * idle sibling and more than one running cookied task. + */ + delta *= rq->core->core_forceidle_count; + delta = div_u64(delta, rq->core->core_forceidle_occupation); + } + + for_each_cpu(i, smt_mask) { + rq_i = cpu_rq(i); + p = rq_i->core_pick ?: rq_i->curr; + + if (!p->core_cookie) + continue; + + __schedstat_add(p->stats.core_forceidle_sum, delta); + } +} + +void __sched_core_tick(struct rq *rq) +{ + if (!rq->core->core_forceidle_count) + return; + + if (rq != rq->core) + update_rq_clock(rq->core); + + __sched_core_account_forceidle(rq); +} + +#endif /* CONFIG_SCHEDSTATS */ |