aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/kernel
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2025-03-24 16:49:40 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2025-03-24 16:49:40 -0700
commit94dc216ad848ebee06ce7692fcfcbb2e9b3e643c (patch)
treee7fac9f7be8a6b7f363706665a7f76c5e607e6d2 /kernel
parentMerge tag 'wq-for-6.15' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/wq (diff)
parentcgroup: rstat: Cleanup flushing functions and locking (diff)
downloadwireguard-linux-94dc216ad848ebee06ce7692fcfcbb2e9b3e643c.tar.xz
wireguard-linux-94dc216ad848ebee06ce7692fcfcbb2e9b3e643c.zip
Merge tag 'cgroup-for-6.15' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup
Pull cgroup updates from Tejun Heo: - Add deprecation info messages to cgroup1-only features - rstat updates including a bug fix and breaking up a critical section to reduce interrupt latency impact - Other misc and doc updates * tag 'cgroup-for-6.15' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup: cgroup: rstat: Cleanup flushing functions and locking cgroup/rstat: avoid disabling irqs for O(num_cpu) mm: Fix a build breakage in memcontrol-v1.c blk-cgroup: Simplify policy files registration cgroup: Update file naming comment cgroup: Add deprecation message to legacy freezer controller mm: Add transformation message for per-memcg swappiness RFC cgroup/cpuset-v1: Add deprecation messages to sched_relax_domain_level cgroup/cpuset-v1: Add deprecation messages to memory_migrate cgroup/cpuset-v1: Add deprecation messages to mem_exclusive and mem_hardwall cgroup: Print message when /proc/cgroups is read on v2-only system cgroup/blkio: Add deprecation messages to reset_stats cgroup/cpuset-v1: Add deprecation messages to memory_spread_page and memory_spread_slab cgroup/cpuset-v1: Add deprecation messages to sched_load_balance and memory_pressure_enabled cgroup, docs: Be explicit about independence of RT_GROUP_SCHED and non-cpu controllers cgroup/rstat: Fix forceidle time in cpu.stat cgroup/misc: Remove unused misc_cg_res_total_usage cgroup/cpuset: Move procfs cpuset attribute under cgroup-v1.c cgroup: update comment about dropping cgroup kn refs
Diffstat (limited to 'kernel')
-rw-r--r--kernel/cgroup/cgroup-internal.h1
-rw-r--r--kernel/cgroup/cgroup-v1.c7
-rw-r--r--kernel/cgroup/cgroup.c6
-rw-r--r--kernel/cgroup/cpuset-v1.c49
-rw-r--r--kernel/cgroup/cpuset.c45
-rw-r--r--kernel/cgroup/legacy_freezer.c6
-rw-r--r--kernel/cgroup/misc.c16
-rw-r--r--kernel/cgroup/rstat.c116
8 files changed, 100 insertions, 146 deletions
diff --git a/kernel/cgroup/cgroup-internal.h b/kernel/cgroup/cgroup-internal.h
index c964dd7ff967..95ab39e1ec8f 100644
--- a/kernel/cgroup/cgroup-internal.h
+++ b/kernel/cgroup/cgroup-internal.h
@@ -168,6 +168,7 @@ struct cgroup_mgctx {
extern struct cgroup_subsys *cgroup_subsys[];
extern struct list_head cgroup_roots;
+extern bool cgrp_dfl_visible;
/* iterate across the hierarchies */
#define for_each_root(root) \
diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c
index e28d5f0d20ed..11ea8d24ac72 100644
--- a/kernel/cgroup/cgroup-v1.c
+++ b/kernel/cgroup/cgroup-v1.c
@@ -673,6 +673,7 @@ struct cftype cgroup1_base_files[] = {
int proc_cgroupstats_show(struct seq_file *m, void *v)
{
struct cgroup_subsys *ss;
+ bool cgrp_v1_visible = false;
int i;
seq_puts(m, "#subsys_name\thierarchy\tnum_cgroups\tenabled\n");
@@ -684,12 +685,18 @@ int proc_cgroupstats_show(struct seq_file *m, void *v)
for_each_subsys(ss, i) {
if (cgroup1_subsys_absent(ss))
continue;
+ cgrp_v1_visible |= ss->root != &cgrp_dfl_root;
+
seq_printf(m, "%s\t%d\t%d\t%d\n",
ss->legacy_name, ss->root->hierarchy_id,
atomic_read(&ss->root->nr_cgrps),
cgroup_ssid_enabled(i));
}
+ if (cgrp_dfl_visible && !cgrp_v1_visible)
+ pr_info_once("/proc/cgroups lists only v1 controllers, use cgroup.controllers of root cgroup for v2 info\n");
+
+
return 0;
}
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index afc665b7b1fe..f231fe3a0744 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -171,7 +171,7 @@ EXPORT_SYMBOL_GPL(cgrp_dfl_root);
* The default hierarchy always exists but is hidden until mounted for the
* first time. This is for backward compatibility.
*/
-static bool cgrp_dfl_visible;
+bool cgrp_dfl_visible;
/* some controllers are not supported in the default hierarchy */
static u16 cgrp_dfl_inhibit_ss_mask;
@@ -4447,7 +4447,7 @@ int cgroup_rm_cftypes(struct cftype *cfts)
* function currently returns 0 as long as @cfts registration is successful
* even if some file creation attempts on existing cgroups fail.
*/
-static int cgroup_add_cftypes(struct cgroup_subsys *ss, struct cftype *cfts)
+int cgroup_add_cftypes(struct cgroup_subsys *ss, struct cftype *cfts)
{
int ret;
@@ -5831,7 +5831,7 @@ int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name, umode_t mode)
}
/*
- * This extra ref will be put in cgroup_free_fn() and guarantees
+ * This extra ref will be put in css_free_rwork_fn() and guarantees
* that @cgrp->kn is always accessible.
*/
kernfs_get(cgrp->kn);
diff --git a/kernel/cgroup/cpuset-v1.c b/kernel/cgroup/cpuset-v1.c
index 25c1d7b77e2f..b69a7db67090 100644
--- a/kernel/cgroup/cpuset-v1.c
+++ b/kernel/cgroup/cpuset-v1.c
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0-or-later
+#include "cgroup-internal.h"
#include "cpuset-internal.h"
/*
@@ -175,6 +176,7 @@ static int cpuset_write_s64(struct cgroup_subsys_state *css, struct cftype *cft,
switch (type) {
case FILE_SCHED_RELAX_DOMAIN_LEVEL:
+ pr_info_once("cpuset.%s is deprecated\n", cft->name);
retval = update_relax_domain_level(cs, val);
break;
default:
@@ -373,6 +375,46 @@ out:
return ret;
}
+#ifdef CONFIG_PROC_PID_CPUSET
+/*
+ * proc_cpuset_show()
+ * - Print tasks cpuset path into seq_file.
+ * - Used for /proc/<pid>/cpuset.
+ */
+int proc_cpuset_show(struct seq_file *m, struct pid_namespace *ns,
+ struct pid *pid, struct task_struct *tsk)
+{
+ char *buf;
+ struct cgroup_subsys_state *css;
+ int retval;
+
+ retval = -ENOMEM;
+ buf = kmalloc(PATH_MAX, GFP_KERNEL);
+ if (!buf)
+ goto out;
+
+ rcu_read_lock();
+ spin_lock_irq(&css_set_lock);
+ css = task_css(tsk, cpuset_cgrp_id);
+ retval = cgroup_path_ns_locked(css->cgroup, buf, PATH_MAX,
+ current->nsproxy->cgroup_ns);
+ spin_unlock_irq(&css_set_lock);
+ rcu_read_unlock();
+
+ if (retval == -E2BIG)
+ retval = -ENAMETOOLONG;
+ if (retval < 0)
+ goto out_free;
+ seq_puts(m, buf);
+ seq_putc(m, '\n');
+ retval = 0;
+out_free:
+ kfree(buf);
+out:
+ return retval;
+}
+#endif /* CONFIG_PROC_PID_CPUSET */
+
static u64 cpuset_read_u64(struct cgroup_subsys_state *css, struct cftype *cft)
{
struct cpuset *cs = css_cs(css);
@@ -424,24 +466,31 @@ static int cpuset_write_u64(struct cgroup_subsys_state *css, struct cftype *cft,
retval = cpuset_update_flag(CS_CPU_EXCLUSIVE, cs, val);
break;
case FILE_MEM_EXCLUSIVE:
+ pr_info_once("cpuset.%s is deprecated\n", cft->name);
retval = cpuset_update_flag(CS_MEM_EXCLUSIVE, cs, val);
break;
case FILE_MEM_HARDWALL:
+ pr_info_once("cpuset.%s is deprecated\n", cft->name);
retval = cpuset_update_flag(CS_MEM_HARDWALL, cs, val);
break;
case FILE_SCHED_LOAD_BALANCE:
+ pr_info_once("cpuset.%s is deprecated, use cpuset.cpus.partition instead\n", cft->name);
retval = cpuset_update_flag(CS_SCHED_LOAD_BALANCE, cs, val);
break;
case FILE_MEMORY_MIGRATE:
+ pr_info_once("cpuset.%s is deprecated\n", cft->name);
retval = cpuset_update_flag(CS_MEMORY_MIGRATE, cs, val);
break;
case FILE_MEMORY_PRESSURE_ENABLED:
+ pr_info_once("cpuset.%s is deprecated, use memory.pressure with CONFIG_PSI instead\n", cft->name);
cpuset_memory_pressure_enabled = !!val;
break;
case FILE_SPREAD_PAGE:
+ pr_info_once("cpuset.%s is deprecated\n", cft->name);
retval = cpuset_update_flag(CS_SPREAD_PAGE, cs, val);
break;
case FILE_SPREAD_SLAB:
+ pr_warn_once("cpuset.%s is deprecated\n", cft->name);
retval = cpuset_update_flag(CS_SPREAD_SLAB, cs, val);
break;
default:
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index 0f910c828973..5a637292faa2 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -21,7 +21,6 @@
* License. See the file COPYING in the main directory of the Linux
* distribution for more details.
*/
-#include "cgroup-internal.h"
#include "cpuset-internal.h"
#include <linux/init.h>
@@ -4244,50 +4243,6 @@ void cpuset_print_current_mems_allowed(void)
rcu_read_unlock();
}
-#ifdef CONFIG_PROC_PID_CPUSET
-/*
- * proc_cpuset_show()
- * - Print tasks cpuset path into seq_file.
- * - Used for /proc/<pid>/cpuset.
- * - No need to task_lock(tsk) on this tsk->cpuset reference, as it
- * doesn't really matter if tsk->cpuset changes after we read it,
- * and we take cpuset_mutex, keeping cpuset_attach() from changing it
- * anyway.
- */
-int proc_cpuset_show(struct seq_file *m, struct pid_namespace *ns,
- struct pid *pid, struct task_struct *tsk)
-{
- char *buf;
- struct cgroup_subsys_state *css;
- int retval;
-
- retval = -ENOMEM;
- buf = kmalloc(PATH_MAX, GFP_KERNEL);
- if (!buf)
- goto out;
-
- rcu_read_lock();
- spin_lock_irq(&css_set_lock);
- css = task_css(tsk, cpuset_cgrp_id);
- retval = cgroup_path_ns_locked(css->cgroup, buf, PATH_MAX,
- current->nsproxy->cgroup_ns);
- spin_unlock_irq(&css_set_lock);
- rcu_read_unlock();
-
- if (retval == -E2BIG)
- retval = -ENAMETOOLONG;
- if (retval < 0)
- goto out_free;
- seq_puts(m, buf);
- seq_putc(m, '\n');
- retval = 0;
-out_free:
- kfree(buf);
-out:
- return retval;
-}
-#endif /* CONFIG_PROC_PID_CPUSET */
-
/* Display task mems_allowed in /proc/<pid>/status file. */
void cpuset_task_status_allowed(struct seq_file *m, struct task_struct *task)
{
diff --git a/kernel/cgroup/legacy_freezer.c b/kernel/cgroup/legacy_freezer.c
index 074653f964c1..039d1eb2f215 100644
--- a/kernel/cgroup/legacy_freezer.c
+++ b/kernel/cgroup/legacy_freezer.c
@@ -430,9 +430,11 @@ static ssize_t freezer_write(struct kernfs_open_file *of,
if (strcmp(buf, freezer_state_strs(0)) == 0)
freeze = false;
- else if (strcmp(buf, freezer_state_strs(CGROUP_FROZEN)) == 0)
+ else if (strcmp(buf, freezer_state_strs(CGROUP_FROZEN)) == 0) {
+ pr_info_once("Freezing with imperfect legacy cgroup freezer. "
+ "See cgroup.freeze of cgroup v2\n");
freeze = true;
- else
+ } else
return -EINVAL;
freezer_change_state(css_freezer(of_css(of)), freeze);
diff --git a/kernel/cgroup/misc.c b/kernel/cgroup/misc.c
index 0e26068995a6..2fa3a4fb2aaf 100644
--- a/kernel/cgroup/misc.c
+++ b/kernel/cgroup/misc.c
@@ -68,22 +68,6 @@ static inline bool valid_type(enum misc_res_type type)
}
/**
- * misc_cg_res_total_usage() - Get the current total usage of the resource.
- * @type: misc res type.
- *
- * Context: Any context.
- * Return: Current total usage of the resource.
- */
-u64 misc_cg_res_total_usage(enum misc_res_type type)
-{
- if (valid_type(type))
- return atomic64_read(&root_cg.res[type].usage);
-
- return 0;
-}
-EXPORT_SYMBOL_GPL(misc_cg_res_total_usage);
-
-/**
* misc_cg_set_capacity() - Set the capacity of the misc cgroup res.
* @type: Type of the misc res.
* @capacity: Supported capacity of the misc res on the host.
diff --git a/kernel/cgroup/rstat.c b/kernel/cgroup/rstat.c
index aac91466279f..4bb587d5d34f 100644
--- a/kernel/cgroup/rstat.c
+++ b/kernel/cgroup/rstat.c
@@ -299,40 +299,6 @@ static inline void __cgroup_rstat_unlock(struct cgroup *cgrp, int cpu_in_loop)
spin_unlock_irq(&cgroup_rstat_lock);
}
-/* see cgroup_rstat_flush() */
-static void cgroup_rstat_flush_locked(struct cgroup *cgrp)
- __releases(&cgroup_rstat_lock) __acquires(&cgroup_rstat_lock)
-{
- int cpu;
-
- lockdep_assert_held(&cgroup_rstat_lock);
-
- for_each_possible_cpu(cpu) {
- struct cgroup *pos = cgroup_rstat_updated_list(cgrp, cpu);
-
- for (; pos; pos = pos->rstat_flush_next) {
- struct cgroup_subsys_state *css;
-
- cgroup_base_stat_flush(pos, cpu);
- bpf_rstat_flush(pos, cgroup_parent(pos), cpu);
-
- rcu_read_lock();
- list_for_each_entry_rcu(css, &pos->rstat_css_list,
- rstat_css_node)
- css->ss->css_rstat_flush(css, cpu);
- rcu_read_unlock();
- }
-
- /* play nice and yield if necessary */
- if (need_resched() || spin_needbreak(&cgroup_rstat_lock)) {
- __cgroup_rstat_unlock(cgrp, cpu);
- if (!cond_resched())
- cpu_relax();
- __cgroup_rstat_lock(cgrp, cpu);
- }
- }
-}
-
/**
* cgroup_rstat_flush - flush stats in @cgrp's subtree
* @cgrp: target cgroup
@@ -348,38 +314,30 @@ static void cgroup_rstat_flush_locked(struct cgroup *cgrp)
*/
__bpf_kfunc void cgroup_rstat_flush(struct cgroup *cgrp)
{
+ int cpu;
+
might_sleep();
+ for_each_possible_cpu(cpu) {
+ struct cgroup *pos = cgroup_rstat_updated_list(cgrp, cpu);
- __cgroup_rstat_lock(cgrp, -1);
- cgroup_rstat_flush_locked(cgrp);
- __cgroup_rstat_unlock(cgrp, -1);
-}
+ /* Reacquire for each CPU to avoid disabling IRQs too long */
+ __cgroup_rstat_lock(cgrp, cpu);
+ for (; pos; pos = pos->rstat_flush_next) {
+ struct cgroup_subsys_state *css;
-/**
- * cgroup_rstat_flush_hold - flush stats in @cgrp's subtree and hold
- * @cgrp: target cgroup
- *
- * Flush stats in @cgrp's subtree and prevent further flushes. Must be
- * paired with cgroup_rstat_flush_release().
- *
- * This function may block.
- */
-void cgroup_rstat_flush_hold(struct cgroup *cgrp)
- __acquires(&cgroup_rstat_lock)
-{
- might_sleep();
- __cgroup_rstat_lock(cgrp, -1);
- cgroup_rstat_flush_locked(cgrp);
-}
+ cgroup_base_stat_flush(pos, cpu);
+ bpf_rstat_flush(pos, cgroup_parent(pos), cpu);
-/**
- * cgroup_rstat_flush_release - release cgroup_rstat_flush_hold()
- * @cgrp: cgroup used by tracepoint
- */
-void cgroup_rstat_flush_release(struct cgroup *cgrp)
- __releases(&cgroup_rstat_lock)
-{
- __cgroup_rstat_unlock(cgrp, -1);
+ rcu_read_lock();
+ list_for_each_entry_rcu(css, &pos->rstat_css_list,
+ rstat_css_node)
+ css->ss->css_rstat_flush(css, cpu);
+ rcu_read_unlock();
+ }
+ __cgroup_rstat_unlock(cgrp, cpu);
+ if (!cond_resched())
+ cpu_relax();
+ }
}
int cgroup_rstat_init(struct cgroup *cgrp)
@@ -612,36 +570,34 @@ static void cgroup_force_idle_show(struct seq_file *seq, struct cgroup_base_stat
void cgroup_base_stat_cputime_show(struct seq_file *seq)
{
struct cgroup *cgrp = seq_css(seq)->cgroup;
- u64 usage, utime, stime, ntime;
+ struct cgroup_base_stat bstat;
if (cgroup_parent(cgrp)) {
- cgroup_rstat_flush_hold(cgrp);
- usage = cgrp->bstat.cputime.sum_exec_runtime;
+ cgroup_rstat_flush(cgrp);
+ __cgroup_rstat_lock(cgrp, -1);
+ bstat = cgrp->bstat;
cputime_adjust(&cgrp->bstat.cputime, &cgrp->prev_cputime,
- &utime, &stime);
- ntime = cgrp->bstat.ntime;
- cgroup_rstat_flush_release(cgrp);
+ &bstat.cputime.utime, &bstat.cputime.stime);
+ __cgroup_rstat_unlock(cgrp, -1);
} else {
- /* cgrp->bstat of root is not actually used, reuse it */
- root_cgroup_cputime(&cgrp->bstat);
- usage = cgrp->bstat.cputime.sum_exec_runtime;
- utime = cgrp->bstat.cputime.utime;
- stime = cgrp->bstat.cputime.stime;
- ntime = cgrp->bstat.ntime;
+ root_cgroup_cputime(&bstat);
}
- do_div(usage, NSEC_PER_USEC);
- do_div(utime, NSEC_PER_USEC);
- do_div(stime, NSEC_PER_USEC);
- do_div(ntime, NSEC_PER_USEC);
+ do_div(bstat.cputime.sum_exec_runtime, NSEC_PER_USEC);
+ do_div(bstat.cputime.utime, NSEC_PER_USEC);
+ do_div(bstat.cputime.stime, NSEC_PER_USEC);
+ do_div(bstat.ntime, NSEC_PER_USEC);
seq_printf(seq, "usage_usec %llu\n"
"user_usec %llu\n"
"system_usec %llu\n"
"nice_usec %llu\n",
- usage, utime, stime, ntime);
+ bstat.cputime.sum_exec_runtime,
+ bstat.cputime.utime,
+ bstat.cputime.stime,
+ bstat.ntime);
- cgroup_force_idle_show(seq, &cgrp->bstat);
+ cgroup_force_idle_show(seq, &bstat);
}
/* Add bpf kfuncs for cgroup_rstat_updated() and cgroup_rstat_flush() */