aboutsummaryrefslogtreecommitdiffstats
path: root/include/linux/psi_types.h
diff options
context:
space:
mode:
authorBrian Chen <brianchen118@gmail.com>2021-11-10 21:33:12 +0000
committerPeter Zijlstra <peterz@infradead.org>2021-11-17 14:49:00 +0100
commitcb0e52b7748737b2cf6481fdd9b920ce7e1ebbdf (patch)
tree0f5b8c2029f541fe9beff7ba050f1c085f48c8b2 /include/linux/psi_types.h
parentsched/core: Forced idle accounting (diff)
downloadlinux-dev-cb0e52b7748737b2cf6481fdd9b920ce7e1ebbdf.tar.xz
linux-dev-cb0e52b7748737b2cf6481fdd9b920ce7e1ebbdf.zip
psi: Fix PSI_MEM_FULL state when tasks are in memstall and doing reclaim
We've noticed cases where tasks in a cgroup are stalled on memory but there is little memory FULL pressure since tasks stay on the runqueue in reclaim. A simple example involves a single threaded program that keeps leaking and touching large amounts of memory. It runs in a cgroup with swap enabled, memory.high set at 10M and cpu.max ratio set at 5%. Though there is significant CPU pressure and memory SOME, there is barely any memory FULL since the task enters reclaim and stays on the runqueue. However, this memory-bound task is effectively stalled on memory and we expect memory FULL to match memory SOME in this scenario. The code is confused about memstall && running, thinking there is a stalled task and a productive task when there's only one task: a reclaimer that's counted as both. To fix this, we redefine the condition for PSI_MEM_FULL to check that all running tasks are in an active memstall instead of checking that there are no running tasks. case PSI_MEM_FULL: - return unlikely(tasks[NR_MEMSTALL] && !tasks[NR_RUNNING]); + return unlikely(tasks[NR_MEMSTALL] && + tasks[NR_RUNNING] == tasks[NR_MEMSTALL_RUNNING]); This will capture reclaimers. It will also capture tasks that called psi_memstall_enter() and are about to sleep, but this should be negligible noise. Signed-off-by: Brian Chen <brianchen118@gmail.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Acked-by: Johannes Weiner <hannes@cmpxchg.org> Link: https://lore.kernel.org/r/20211110213312.310243-1-brianchen118@gmail.com
Diffstat (limited to 'include/linux/psi_types.h')
-rw-r--r--include/linux/psi_types.h13
1 files changed, 12 insertions, 1 deletions
diff --git a/include/linux/psi_types.h b/include/linux/psi_types.h
index bf50068d5d4b..516c0fe836fd 100644
--- a/include/linux/psi_types.h
+++ b/include/linux/psi_types.h
@@ -22,7 +22,17 @@ enum psi_task_count {
* don't have to special case any state tracking for it.
*/
NR_ONCPU,
- NR_PSI_TASK_COUNTS = 4,
+ /*
+ * For IO and CPU stalls the presence of running/oncpu tasks
+ * in the domain means a partial rather than a full stall.
+ * For memory it's not so simple because of page reclaimers:
+ * they are running/oncpu while representing a stall. To tell
+ * whether a domain has productivity left or not, we need to
+ * distinguish between regular running (i.e. productive)
+ * threads and memstall ones.
+ */
+ NR_MEMSTALL_RUNNING,
+ NR_PSI_TASK_COUNTS = 5,
};
/* Task state bitmasks */
@@ -30,6 +40,7 @@ enum psi_task_count {
#define TSK_MEMSTALL (1 << NR_MEMSTALL)
#define TSK_RUNNING (1 << NR_RUNNING)
#define TSK_ONCPU (1 << NR_ONCPU)
+#define TSK_MEMSTALL_RUNNING (1 << NR_MEMSTALL_RUNNING)
/* Resources that workloads could be stalled on */
enum psi_res {