aboutsummaryrefslogtreecommitdiffstats
path: root/include/linux/psi_types.h
diff options
context:
space:
mode:
authorJohannes Weiner <hannes@cmpxchg.org>2020-03-16 15:13:31 -0400
committerPeter Zijlstra <peterz@infradead.org>2020-03-20 13:06:18 +0100
commitb05e75d611380881e73edc58a20fd8c6bb71720b (patch)
tree3d641b57b42e934d7518f22a13f3a74cd76f6ff7 /include/linux/psi_types.h
parentsched/core: Distribute tasks within affinity masks (diff)
downloadlinux-dev-b05e75d611380881e73edc58a20fd8c6bb71720b.tar.xz
linux-dev-b05e75d611380881e73edc58a20fd8c6bb71720b.zip
psi: Fix cpu.pressure for cpu.max and competing cgroups
For simplicity, cpu pressure is defined as having more than one runnable task on a given CPU. This works on the system-level, but it has limitations in a cgrouped reality: When cpu.max is in use, it doesn't capture the time in which a task is not executing on the CPU due to throttling. Likewise, it doesn't capture the time in which a competing cgroup is occupying the CPU - meaning it only reflects cgroup-internal competitive pressure, not outside pressure. Enable tracking of currently executing tasks, and then change the definition of cpu pressure in a cgroup from NR_RUNNING > 1 to NR_RUNNING > ON_CPU which will capture the effects of cpu.max as well as competition from outside the cgroup. After this patch, a cgroup running `stress -c 1` with a cpu.max setting of 5000 10000 shows ~50% continuous CPU pressure. Signed-off-by: Johannes Weiner <hannes@cmpxchg.org> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Link: https://lkml.kernel.org/r/20200316191333.115523-2-hannes@cmpxchg.org
Diffstat (limited to 'include/linux/psi_types.h')
-rw-r--r--include/linux/psi_types.h10
1 files changed, 9 insertions, 1 deletions
diff --git a/include/linux/psi_types.h b/include/linux/psi_types.h
index 07aaf9b82241..4b7258495a04 100644
--- a/include/linux/psi_types.h
+++ b/include/linux/psi_types.h
@@ -14,13 +14,21 @@ enum psi_task_count {
NR_IOWAIT,
NR_MEMSTALL,
NR_RUNNING,
- NR_PSI_TASK_COUNTS = 3,
+ /*
+ * This can't have values other than 0 or 1 and could be
+ * implemented as a bit flag. But for now we still have room
+ * in the first cacheline of psi_group_cpu, and this way we
+ * don't have to special case any state tracking for it.
+ */
+ NR_ONCPU,
+ NR_PSI_TASK_COUNTS = 4,
};
/* Task state bitmasks */
#define TSK_IOWAIT (1 << NR_IOWAIT)
#define TSK_MEMSTALL (1 << NR_MEMSTALL)
#define TSK_RUNNING (1 << NR_RUNNING)
+#define TSK_ONCPU (1 << NR_ONCPU)
/* Resources that workloads could be stalled on */
enum psi_res {