From eda09706b240ca9129ac4e1fbb4eb1e2bc67aadc Mon Sep 17 00:00:00 2001 From: Michal Koutný Date: Wed, 3 Nov 2021 17:58:45 +0100 Subject: cgroup: rstat: Mark benign data race to silence KCSAN MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is a race between updaters and flushers (flush can possibly miss the latest update(s)). This is expected as explained in cgroup_rstat_updated() comment, add also machine readable annotation so that KCSAN results aren't noisy. Reported-by: Hao Sun Link: https://lore.kernel.org/r/CACkBjsbPVdkub=e-E-p1WBOLxS515ith-53SFdmFHWV_QMo40w@mail.gmail.com Suggested-by: Hao Sun Signed-off-by: Michal Koutný Reviewed-by: Shakeel Butt Signed-off-by: Tejun Heo --- kernel/cgroup/rstat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/cgroup/rstat.c b/kernel/cgroup/rstat.c index 1486768f2318..1abe74114527 100644 --- a/kernel/cgroup/rstat.c +++ b/kernel/cgroup/rstat.c @@ -35,7 +35,7 @@ void cgroup_rstat_updated(struct cgroup *cgrp, int cpu) * instead of NULL, we can tell whether @cgrp is on the list by * testing the next pointer for NULL. */ - if (cgroup_rstat_cpu(cgrp, cpu)->updated_next) + if (data_race(cgroup_rstat_cpu(cgrp, cpu)->updated_next)) return; raw_spin_lock_irqsave(cpu_lock, flags); -- cgit v1.2.3-59-g8ed1b From 8291471ea5f1b2e6782cbb9c6ed785f12435245f Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Sat, 27 Nov 2021 14:59:19 +0000 Subject: cgroup: get the wrong css for css_alloc() during cgroup_init_subsys() css_alloc() needs the parent css, while cgroup_css() gets current cgropu's css. So we are getting the wrong css during cgroup_init_subsys(). Fortunately, cgrp_dfl_root.cgrp's css is not set yet, so the value we pass to css_alloc() is NULL anyway. Let's pass NULL directly during init, since we know there is no parent yet. Signed-off-by: Wei Yang Signed-off-by: Tejun Heo --- kernel/cgroup/cgroup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 919194de39c8..f522cee8e650 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -5711,7 +5711,7 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss, bool early) /* Create the root cgroup state for this subsystem */ ss->root = &cgrp_dfl_root; - css = ss->css_alloc(cgroup_css(&cgrp_dfl_root.cgrp, ss)); + css = ss->css_alloc(NULL); /* We don't handle early failures gracefully */ BUG_ON(IS_ERR(css)); init_and_link_css(css, ss, &cgrp_dfl_root.cgrp); -- cgit v1.2.3-59-g8ed1b From 1f1562fcd04a485734e94390660e741c3be47867 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Sun, 5 Dec 2021 13:32:14 -0500 Subject: cgroup/cpuset: Don't let child cpusets restrict parent in default hierarchy In validate_change(), there is a check since v2.6.12 to make sure that each of the child cpusets must be a subset of a parent cpuset. IOW, it allows child cpusets to restrict what changes can be made to a parent's "cpuset.cpus". This actually violates one of the core principles of the default hierarchy where a cgroup higher up in the hierarchy should be able to change configuration however it sees fit as deligation breaks down otherwise. To address this issue, the check is now removed for the default hierarchy to free parent cpusets from being restricted by child cpusets. The check will still apply for legacy hierarchy. Suggested-by: Tejun Heo Signed-off-by: Waiman Long Signed-off-by: Tejun Heo --- kernel/cgroup/cpuset.c | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) (limited to 'kernel') diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index d0e163a02099..0dd7d853ed17 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -616,19 +616,11 @@ static int validate_change(struct cpuset *cur, struct cpuset *trial) struct cpuset *c, *par; int ret; - rcu_read_lock(); - - /* Each of our child cpusets must be a subset of us */ - ret = -EBUSY; - cpuset_for_each_child(c, css, cur) - if (!is_cpuset_subset(c, trial)) - goto out; - - /* Remaining checks don't apply to root cpuset */ - ret = 0; + /* The checks don't apply to root cpuset */ if (cur == &top_cpuset) - goto out; + return 0; + rcu_read_lock(); par = parent_cs(cur); /* On legacy hierarchy, we must be a subset of our parent cpuset. */ -- cgit v1.2.3-59-g8ed1b From 1815775e74541d7b498c0baf15726ad3d1247abf Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Tue, 14 Dec 2021 00:46:07 +0000 Subject: cgroup: return early if it is already on preloaded list If a cset is already on preloaded list, this means we have already setup this cset properly for migration. This patch just relocates the root cgrp lookup which isn't used anyway when the cset is already on the preloaded list. [tj@kernel.org: rephrase the commit log] Signed-off-by: Wei Yang Signed-off-by: Tejun Heo --- kernel/cgroup/cgroup.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index f522cee8e650..4f77bf1eaf9f 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -2650,11 +2650,11 @@ void cgroup_migrate_add_src(struct css_set *src_cset, if (src_cset->dead) return; - src_cgrp = cset_cgroup_from_root(src_cset, dst_cgrp->root); - if (!list_empty(&src_cset->mg_preload_node)) return; + src_cgrp = cset_cgroup_from_root(src_cset, dst_cgrp->root); + WARN_ON(src_cset->mg_src_cgrp); WARN_ON(src_cset->mg_dst_cgrp); WARN_ON(!list_empty(&src_cset->mg_tasks)); -- cgit v1.2.3-59-g8ed1b From 0da41f7348fff193d01d031ce255088fa98324b7 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Sat, 25 Dec 2021 00:09:31 +0000 Subject: cgroup: rstat: explicitly put loop variant in while MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of do while unconditionally, let's put the loop variant in while. Signed-off-by: Wei Yang Reviewed-by: Michal Koutný Signed-off-by: Tejun Heo --- kernel/cgroup/rstat.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/cgroup/rstat.c b/kernel/cgroup/rstat.c index 1abe74114527..bc6993258271 100644 --- a/kernel/cgroup/rstat.c +++ b/kernel/cgroup/rstat.c @@ -124,12 +124,10 @@ static struct cgroup *cgroup_rstat_cpu_pop_updated(struct cgroup *pos, prstatc = cgroup_rstat_cpu(parent, cpu); nextp = &prstatc->updated_children; - while (true) { + while (*nextp != pos) { struct cgroup_rstat_cpu *nrstatc; nrstatc = cgroup_rstat_cpu(*nextp, cpu); - if (*nextp == pos) - break; WARN_ON_ONCE(*nextp == parent); nextp = &nrstatc->updated_next; } -- cgit v1.2.3-59-g8ed1b From f5f60d235e7058da13a643c33fc7599c05ec0b73 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Sat, 25 Dec 2021 00:09:32 +0000 Subject: cgroup/rstat: check updated_next only for root MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After commit dc26532aed0a ("cgroup: rstat: punt root-level optimization to individual controllers"), each rstat on updated_children list has its ->updated_next not NULL. This means we can remove the check on ->updated_next, if we make sure the subtree from @root is on list, which could be done by checking updated_next for root. tj: Coding style fixes. Signed-off-by: Wei Yang Reviewed-by: Michal Koutný Signed-off-by: Tejun Heo --- kernel/cgroup/rstat.c | 47 +++++++++++++++++++++++------------------------ 1 file changed, 23 insertions(+), 24 deletions(-) (limited to 'kernel') diff --git a/kernel/cgroup/rstat.c b/kernel/cgroup/rstat.c index bc6993258271..9d331ba44870 100644 --- a/kernel/cgroup/rstat.c +++ b/kernel/cgroup/rstat.c @@ -88,6 +88,7 @@ static struct cgroup *cgroup_rstat_cpu_pop_updated(struct cgroup *pos, struct cgroup *root, int cpu) { struct cgroup_rstat_cpu *rstatc; + struct cgroup *parent; if (pos == root) return NULL; @@ -96,10 +97,14 @@ static struct cgroup *cgroup_rstat_cpu_pop_updated(struct cgroup *pos, * We're gonna walk down to the first leaf and visit/remove it. We * can pick whatever unvisited node as the starting point. */ - if (!pos) + if (!pos) { pos = root; - else + /* return NULL if this subtree is not on-list */ + if (!cgroup_rstat_cpu(pos, cpu)->updated_next) + return NULL; + } else { pos = cgroup_parent(pos); + } /* walk down to the first leaf */ while (true) { @@ -115,31 +120,25 @@ static struct cgroup *cgroup_rstat_cpu_pop_updated(struct cgroup *pos, * However, due to the way we traverse, @pos will be the first * child in most cases. The only exception is @root. */ - if (rstatc->updated_next) { - struct cgroup *parent = cgroup_parent(pos); - - if (parent) { - struct cgroup_rstat_cpu *prstatc; - struct cgroup **nextp; - - prstatc = cgroup_rstat_cpu(parent, cpu); - nextp = &prstatc->updated_children; - while (*nextp != pos) { - struct cgroup_rstat_cpu *nrstatc; - - nrstatc = cgroup_rstat_cpu(*nextp, cpu); - WARN_ON_ONCE(*nextp == parent); - nextp = &nrstatc->updated_next; - } - *nextp = rstatc->updated_next; - } + parent = cgroup_parent(pos); + if (parent) { + struct cgroup_rstat_cpu *prstatc; + struct cgroup **nextp; - rstatc->updated_next = NULL; - return pos; + prstatc = cgroup_rstat_cpu(parent, cpu); + nextp = &prstatc->updated_children; + while (*nextp != pos) { + struct cgroup_rstat_cpu *nrstatc; + + nrstatc = cgroup_rstat_cpu(*nextp, cpu); + WARN_ON_ONCE(*nextp == parent); + nextp = &nrstatc->updated_next; + } + *nextp = rstatc->updated_next; } - /* only happens for @root */ - return NULL; + rstatc->updated_next = NULL; + return pos; } /* see cgroup_rstat_flush() */ -- cgit v1.2.3-59-g8ed1b From d4296faebd337e5f76c0fddb815de33d2b0ad118 Mon Sep 17 00:00:00 2001 From: Qi Zheng Date: Sun, 19 Dec 2021 10:41:54 +0800 Subject: cpuset: convert 'allowed' in __cpuset_node_allowed() to be boolean Convert 'allowed' in __cpuset_node_allowed() to be boolean since the return types of node_isset() and __cpuset_node_allowed() are both boolean. Signed-off-by: Qi Zheng Signed-off-by: Tejun Heo --- kernel/cgroup/cpuset.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index 0dd7d853ed17..dc653ab26e50 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -3528,7 +3528,7 @@ static struct cpuset *nearest_hardwall_ancestor(struct cpuset *cs) bool __cpuset_node_allowed(int node, gfp_t gfp_mask) { struct cpuset *cs; /* current cpuset ancestors */ - int allowed; /* is allocation in zone z allowed? */ + bool allowed; /* is allocation in zone z allowed? */ unsigned long flags; if (in_interrupt()) -- cgit v1.2.3-59-g8ed1b