Merge branch 'for-5.2' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup

Pull cgroup updates from Tejun Heo: "This includes Roman's cgroup2 freezer implementation. It's a separate machanism from cgroup1 freezer. Instead of blocking user tasks in arbitrary uninterruptible sleeps, the new implementation extends jobctl stop - frozen tasks are trapped in jobctl stop until thawed and can be killed and ptraced. Lots of thanks to Oleg for sheperding the effort. Other than that, there are a few trivial changes" * 'for-5.2' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup: cgroup: never call do_group_exit() with task->frozen bit set kernel: cgroup: fix misuse of %x cgroup: get rid of cgroup_freezer_frozen_exit() cgroup: prevent spurious transition into non-frozen state cgroup: Remove unused cgrp variable cgroup: document cgroup v2 freezer interface cgroup: add tracing points for cgroup v2 freezer cgroup: make TRACE_CGROUP_PATH irq-safe kselftests: cgroup: add freezer controller self-tests kselftests: cgroup: don't fail on cg_kill_all() error in cg_destroy() cgroup: cgroup v2 freezer cgroup: protect cgroup->nr_(dying_)descendants by css_set_lock cgroup: implement __cgroup_task_count() helper cgroup: rename freezer.c into legacy_freezer.c cgroup: remove extra cgroup_migrate_finish() call
author: Linus Torvalds <torvalds@linux-foundation.org> 2019-05-09 13:52:12 -0700
committer: Linus Torvalds <torvalds@linux-foundation.org> 2019-05-09 13:52:12 -0700
commit: abde77eb5c66b2f98539c4644b54f34b7e179e6b (patch)
tree: cc6f6142641397e895fba9a19d0971f3827d7422
parent: Merge branch 'for-5.2' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/wq (diff)
parent: cgroup: never call do_group_exit() with task->frozen bit set (diff)
20 files changed, 2010 insertions, 441 deletions
diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst
index 20f92c16ffbf..88e746074252 100644
--- a/Documentation/admin-guide/cgroup-v2.rst
+++ b/Documentation/admin-guide/cgroup-v2.rst
@@ -864,6 +864,8 @@ All cgroup core files are prefixed with "cgroup."
 	  populated
 		1 if the cgroup or its descendants contains any live
 		processes; otherwise, 0.
+	  frozen
+		1 if the cgroup is frozen; otherwise, 0.
 
   cgroup.max.descendants
 	A read-write single value files.  The default is "max".
@@ -897,6 +899,31 @@ All cgroup core files are prefixed with "cgroup."
 		A dying cgroup can consume system resources not exceeding
 		limits, which were active at the moment of cgroup deletion.
 
+  cgroup.freeze
+	A read-write single value file which exists on non-root cgroups.
+	Allowed values are "0" and "1". The default is "0".
+
+	Writing "1" to the file causes freezing of the cgroup and all
+	descendant cgroups. This means that all belonging processes will
+	be stopped and will not run until the cgroup will be explicitly
+	unfrozen. Freezing of the cgroup may take some time; when this action
+	is completed, the "frozen" value in the cgroup.events control file
+	will be updated to "1" and the corresponding notification will be
+	issued.
+
+	A cgroup can be frozen either by its own settings, or by settings
+	of any ancestor cgroups. If any of ancestor cgroups is frozen, the
+	cgroup will remain frozen.
+
+	Processes in the frozen cgroup can be killed by a fatal signal.
+	They also can enter and leave a frozen cgroup: either by an explicit
+	move by a user, or if freezing of the cgroup races with fork().
+	If a process is moved to a frozen cgroup, it stops. If a process is
+	moved out of a frozen cgroup, it becomes running.
+
+	Frozen status of a cgroup doesn't affect any cgroup tree operations:
+	it's possible to delete a frozen (and empty) cgroup, as well as
+	create new sub-cgroups.
 
 Controllers
 ===========
diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index 1c70803e9f77..77258d276f93 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -65,6 +65,12 @@ enum {
 	 * specified at mount time and thus is implemented here.
 	 */
 	CGRP_CPUSET_CLONE_CHILDREN,
+
+	/* Control group has to be frozen. */
+	CGRP_FREEZE,
+
+	/* Cgroup is frozen. */
+	CGRP_FROZEN,
 };
 
 /* cgroup_root->flags */
@@ -317,6 +323,25 @@ struct cgroup_rstat_cpu {
 	struct cgroup *updated_next;		/* NULL iff not on the list */
 };
 
+struct cgroup_freezer_state {
+	/* Should the cgroup and its descendants be frozen. */
+	bool freeze;
+
+	/* Should the cgroup actually be frozen? */
+	int e_freeze;
+
+	/* Fields below are protected by css_set_lock */
+
+	/* Number of frozen descendant cgroups */
+	int nr_frozen_descendants;
+
+	/*
+	 * Number of tasks, which are counted as frozen:
+	 * frozen, SIGSTOPped, and PTRACEd.
+	 */
+	int nr_frozen_tasks;
+};
+
 struct cgroup {
 	/* self css with NULL ->ss, points back to this cgroup */
 	struct cgroup_subsys_state self;
@@ -349,6 +374,11 @@ struct cgroup {
 	 * Dying cgroups are cgroups which were deleted by a user,
 	 * but are still existing because someone else is holding a reference.
 	 * max_descendants is a maximum allowed number of descent cgroups.
+	 *
+	 * nr_descendants and nr_dying_descendants are protected
+	 * by cgroup_mutex and css_set_lock. It's fine to read them holding
+	 * any of cgroup_mutex and css_set_lock; for writing both locks
+	 * should be held.
 	 */
 	int nr_descendants;
 	int nr_dying_descendants;
@@ -448,6 +478,9 @@ struct cgroup {
 	/* If there is block congestion on this cgroup. */
 	atomic_t congestion_count;
 
+	/* Used to store internal freezer state */
+	struct cgroup_freezer_state freezer;
+
 	/* ids of the ancestors at each level including self */
 	int ancestor_ids[];
 };
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 81f58b4a5418..c0077adeea83 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -881,4 +881,47 @@ static inline void put_cgroup_ns(struct cgroup_namespace *ns)
 		free_cgroup_ns(ns);
 }
 
+#ifdef CONFIG_CGROUPS
+
+void cgroup_enter_frozen(void);
+void cgroup_leave_frozen(bool always_leave);
+void cgroup_update_frozen(struct cgroup *cgrp);
+void cgroup_freeze(struct cgroup *cgrp, bool freeze);
+void cgroup_freezer_migrate_task(struct task_struct *task, struct cgroup *src,
+				 struct cgroup *dst);
+
+static inline bool cgroup_task_freeze(struct task_struct *task)
+{
+	bool ret;
+
+	if (task->flags & PF_KTHREAD)
+		return false;
+
+	rcu_read_lock();
+	ret = test_bit(CGRP_FREEZE, &task_dfl_cgroup(task)->flags);
+	rcu_read_unlock();
+
+	return ret;
+}
+
+static inline bool cgroup_task_frozen(struct task_struct *task)
+{
+	return task->frozen;
+}
+
+#else /* !CONFIG_CGROUPS */
+
+static inline void cgroup_enter_frozen(void) { }
+static inline void cgroup_leave_frozen(bool always_leave) { }
+static inline bool cgroup_task_freeze(struct task_struct *task)
+{
+	return false;
+}
+static inline bool cgroup_task_frozen(struct task_struct *task)
+{
+	return false;
+}
+
+#endif /* !CONFIG_CGROUPS */
+
 #endif /* _LINUX_CGROUP_H */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 50606a6e73d6..a2cd15855bad 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -726,6 +726,8 @@ struct task_struct {
 #ifdef CONFIG_CGROUPS
 	/* disallow userland-initiated cgroup migration */
 	unsigned			no_cgroup_migration:1;
+	/* task is frozen/stopped (used by the cgroup freezer) */
+	unsigned			frozen:1;
 #endif
 #ifdef CONFIG_BLK_CGROUP
 	/* to be used once the psi infrastructure lands upstream. */
diff --git a/include/linux/sched/jobctl.h b/include/linux/sched/jobctl.h
index 98228bd48aee..fa067de9f1a9 100644
--- a/include/linux/sched/jobctl.h
+++ b/include/linux/sched/jobctl.h
@@ -18,6 +18,7 @@ struct task_struct;
 #define JOBCTL_TRAP_NOTIFY_BIT	20	/* trap for NOTIFY */
 #define JOBCTL_TRAPPING_BIT	21	/* switching to TRACED */
 #define JOBCTL_LISTENING_BIT	22	/* ptracer is listening for events */
+#define JOBCTL_TRAP_FREEZE_BIT	23	/* trap for cgroup freezer */
 
 #define JOBCTL_STOP_DEQUEUED	(1UL << JOBCTL_STOP_DEQUEUED_BIT)
 #define JOBCTL_STOP_PENDING	(1UL << JOBCTL_STOP_PENDING_BIT)
@@ -26,6 +27,7 @@ struct task_struct;
 #define JOBCTL_TRAP_NOTIFY	(1UL << JOBCTL_TRAP_NOTIFY_BIT)
 #define JOBCTL_TRAPPING		(1UL << JOBCTL_TRAPPING_BIT)
 #define JOBCTL_LISTENING	(1UL << JOBCTL_LISTENING_BIT)
+#define JOBCTL_TRAP_FREEZE	(1UL << JOBCTL_TRAP_FREEZE_BIT)
 
 #define JOBCTL_TRAP_MASK	(JOBCTL_TRAP_STOP | JOBCTL_TRAP_NOTIFY)
 #define JOBCTL_PENDING_MASK	(JOBCTL_STOP_PENDING | JOBCTL_TRAP_MASK)
diff --git a/include/trace/events/cgroup.h b/include/trace/events/cgroup.h
index a401ff5e7847..a566cc521476 100644
--- a/include/trace/events/cgroup.h
+++ b/include/trace/events/cgroup.h
@@ -103,6 +103,20 @@ DEFINE_EVENT(cgroup, cgroup_rename,
 	TP_ARGS(cgrp, path)
 );
 
+DEFINE_EVENT(cgroup, cgroup_freeze,
+
+	TP_PROTO(struct cgroup *cgrp, const char *path),
+
+	TP_ARGS(cgrp, path)
+);
+
+DEFINE_EVENT(cgroup, cgroup_unfreeze,
+
+	TP_PROTO(struct cgroup *cgrp, const char *path),
+
+	TP_ARGS(cgrp, path)
+);
+
 DECLARE_EVENT_CLASS(cgroup_migrate,
 
 	TP_PROTO(struct cgroup *dst_cgrp, const char *path,
@@ -149,6 +163,47 @@ DEFINE_EVENT(cgroup_migrate, cgroup_transfer_tasks,
 	TP_ARGS(dst_cgrp, path, task, threadgroup)
 );
 
+DECLARE_EVENT_CLASS(cgroup_event,
+
+	TP_PROTO(struct cgroup *cgrp, const char *path, int val),
+
+	TP_ARGS(cgrp, path, val),
+
+	TP_STRUCT__entry(
+		__field(	int,		root			)
+		__field(	int,		id			)
+		__field(	int,		level			)
+		__string(	path,		path			)
+		__field(	int,		val			)
+	),
+
+	TP_fast_assign(
+		__entry->root = cgrp->root->hierarchy_id;
+		__entry->id = cgrp->id;
+		__entry->level = cgrp->level;
+		__assign_str(path, path);
+		__entry->val = val;
+	),
+
+	TP_printk("root=%d id=%d level=%d path=%s val=%d",
+		  __entry->root, __entry->id, __entry->level, __get_str(path),
+		  __entry->val)
+);
+
+DEFINE_EVENT(cgroup_event, cgroup_notify_populated,
+
+	TP_PROTO(struct cgroup *cgrp, const char *path, int val),
+
+	TP_ARGS(cgrp, path, val)
+);
+
+DEFINE_EVENT(cgroup_event, cgroup_notify_frozen,
+
+	TP_PROTO(struct cgroup *cgrp, const char *path, int val),
+
+	TP_ARGS(cgrp, path, val)
+);
+
 #endif /* _TRACE_CGROUP_H */
 
 /* This part must be outside protection */
diff --git a/kernel/cgroup/Makefile b/kernel/cgroup/Makefile
index bfcdae896122..5d7a76bfbbb7 100644
--- a/kernel/cgroup/Makefile
+++ b/kernel/cgroup/Makefile
@@ -1,7 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0
-obj-y := cgroup.o rstat.o namespace.o cgroup-v1.o
+obj-y := cgroup.o rstat.o namespace.o cgroup-v1.o freezer.o
 
-obj-$(CONFIG_CGROUP_FREEZER) += freezer.o
+obj-$(CONFIG_CGROUP_FREEZER) += legacy_freezer.o
 obj-$(CONFIG_CGROUP_PIDS) += pids.o
 obj-$(CONFIG_CGROUP_RDMA) += rdma.o
 obj-$(CONFIG_CPUSETS) += cpuset.o
diff --git a/kernel/cgroup/cgroup-internal.h b/kernel/cgroup/cgroup-internal.h
index 30e39f3932ad..809e34a3c017 100644
--- a/kernel/cgroup/cgroup-internal.h
+++ b/kernel/cgroup/cgroup-internal.h
@@ -28,12 +28,15 @@ extern void __init enable_debug_cgroup(void);
 #define TRACE_CGROUP_PATH(type, cgrp, ...)				\
 	do {								\
 		if (trace_cgroup_##type##_enabled()) {			\
-			spin_lock(&trace_cgroup_path_lock);		\
+			unsigned long flags;				\
+			spin_lock_irqsave(&trace_cgroup_path_lock,	\
+					  flags);			\
 			cgroup_path(cgrp, trace_cgroup_path,		\
 				    TRACE_CGROUP_PATH_LEN);		\
 			trace_cgroup_##type(cgrp, trace_cgroup_path,	\
 					    ##__VA_ARGS__);		\
-			spin_unlock(&trace_cgroup_path_lock);		\
+			spin_unlock_irqrestore(&trace_cgroup_path_lock, \
+					       flags);			\
 		}							\
 	} while (0)
 
@@ -240,6 +243,7 @@ int cgroup_rmdir(struct kernfs_node *kn);
 int cgroup_show_path(struct seq_file *sf, struct kernfs_node *kf_node,
 		     struct kernfs_root *kf_root);
 
+int __cgroup_task_count(const struct cgroup *cgrp);
 int cgroup_task_count(const struct cgroup *cgrp);
 
 /*
diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c
index c126b34fd4ff..68ca5de7ec27 100644
--- a/kernel/cgroup/cgroup-v1.c
+++ b/kernel/cgroup/cgroup-v1.c
@@ -342,22 +342,6 @@ static struct cgroup_pidlist *cgroup_pidlist_find_create(struct cgroup *cgrp,
 	return l;
 }
 
-/**
- * cgroup_task_count - count the number of tasks in a cgroup.
- * @cgrp: the cgroup in question
- */
-int cgroup_task_count(const struct cgroup *cgrp)
-{
-	int count = 0;
-	struct cgrp_cset_link *link;
-
-	spin_lock_irq(&css_set_lock);
-	list_for_each_entry(link, &cgrp->cset_links, cset_link)
-		count += link->cset->nr_tasks;
-	spin_unlock_irq(&css_set_lock);
-	return count;
-}
-
 /*
  * Load a cgroup's pidarray with either procs' tgids or tasks' pids
  */
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 3f2b4bde0f9c..327f37c9fdfa 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -593,6 +593,39 @@ static void cgroup_get_live(struct cgroup *cgrp)
 	css_get(&cgrp->self);
 }
 
+/**
+ * __cgroup_task_count - count the number of tasks in a cgroup. The caller
+ * is responsible for taking the css_set_lock.
+ * @cgrp: the cgroup in question
+ */
+int __cgroup_task_count(const struct cgroup *cgrp)
+{
+	int count = 0;
+	struct cgrp_cset_link *link;
+
+	lockdep_assert_held(&css_set_lock);
+
+	list_for_each_entry(link, &cgrp->cset_links, cset_link)
+		count += link->cset->nr_tasks;
+
+	return count;
+}
+
+/**
+ * cgroup_task_count - count the number of tasks in a cgroup.
+ * @cgrp: the cgroup in question
+ */
+int cgroup_task_count(const struct cgroup *cgrp)
+{
+	int count;
+
+	spin_lock_irq(&css_set_lock);
+	count = __cgroup_task_count(cgrp);
+	spin_unlock_irq(&css_set_lock);
+
+	return count;
+}
+
 struct cgroup_subsys_state *of_css(struct kernfs_open_file *of)
 {
 	struct cgroup *cgrp = of->kn->parent->priv;
@@ -783,6 +816,8 @@ static void cgroup_update_populated(struct cgroup *cgrp, bool populated)
 			break;
 
 		cgroup1_check_for_release(cgrp);
+		TRACE_CGROUP_PATH(notify_populated, cgrp,
+				  cgroup_is_populated(cgrp));
 		cgroup_file_notify(&cgrp->events_file);
 
 		child = cgrp;
@@ -2402,8 +2437,15 @@ static int cgroup_migrate_execute(struct cgroup_mgctx *mgctx)
 			get_css_set(to_cset);
 			to_cset->nr_tasks++;
 			css_set_move_task(task, from_cset, to_cset, true);
-			put_css_set_locked(from_cset);
 			from_cset->nr_tasks--;
+			/*
+			 * If the source or destination cgroup is frozen,
+			 * the task might require to change its state.
+			 */
+			cgroup_freezer_migrate_task(task, from_cset->dfl_cgrp,
+						    to_cset->dfl_cgrp);
+			put_css_set_locked(from_cset);
+
 		}
 	}
 	spin_unlock_irq(&css_set_lock);
@@ -2602,7 +2644,7 @@ int cgroup_migrate_prepare_dst(struct cgroup_mgctx *mgctx)
 
 		dst_cset = find_css_set(src_cset, src_cset->mg_dst_cgrp);
 		if (!dst_cset)
-			goto err;
+			return -ENOMEM;
 
 		WARN_ON_ONCE(src_cset->mg_dst_cset || dst_cset->mg_dst_cset);
 
@@ -2634,9 +2676,6 @@ int cgroup_migrate_prepare_dst(struct cgroup_mgctx *mgctx)
 	}
 
 	return 0;
-err:
-	cgroup_migrate_finish(mgctx);
-	return -ENOMEM;
 }
 
 /**
@@ -3447,8 +3486,11 @@ static ssize_t cgroup_max_depth_write(struct kernfs_open_file *of,
 
 static int cgroup_events_show(struct seq_file *seq, void *v)
 {
-	seq_printf(seq, "populated %d\n",
-		   cgroup_is_populated(seq_css(seq)->cgroup));
+	struct cgroup *cgrp = seq_css(seq)->cgroup;
+
+	seq_printf(seq, "populated %d\n", cgroup_is_populated(cgrp));
+	seq_printf(seq, "frozen %d\n", test_bit(CGRP_FROZEN, &cgrp->flags));
+
 	return 0;
 }
 
@@ -3510,6 +3552,40 @@ static int cgroup_cpu_pressure_show(struct seq_file *seq, void *v)
 }
 #endif
 
+static int cgroup_freeze_show(struct seq_file *seq, void *v)
+{
+	struct cgroup *cgrp = seq_css(seq)->cgroup;
+
+	seq_printf(seq, "%d\n", cgrp->freezer.freeze);
+
+	return 0;
+}
+
+static ssize_t cgroup_freeze_write(struct kernfs_open_file *of,
+				   char *buf, size_t nbytes, loff_t off)
+{
+	struct cgroup *cgrp;
+	ssize_t ret;
+	int freeze;
+
+	ret = kstrtoint(strstrip(buf), 0, &freeze);
+	if (ret)
+		return ret;
+
+	if (freeze < 0 || freeze > 1)
+		return -ERANGE;
+
+	cgrp = cgroup_kn_lock_live(of->kn, false);
+	if (!cgrp)
+		return -ENOENT;
+
+	cgroup_freeze(cgrp, freeze);
+
+	cgroup_kn_unlock(of->kn);
+
+	return nbytes;
+}
+
 static int cgroup_file_open(struct kernfs_open_file *of)
 {
 	struct cftype *cft = of->kn->priv;
@@ -4654,6 +4730,12 @@ static struct cftype cgroup_base_files[] = {
 		.seq_show = cgroup_stat_show,
 	},
 	{
+		.name = "cgroup.freeze",
+		.flags = CFTYPE_NOT_ON_ROOT,
+		.seq_show = cgroup_freeze_show,
+		.write = cgroup_freeze_write,
+	},
+	{
 		.name = "cpu.stat",
 		.flags = CFTYPE_NOT_ON_ROOT,
 		.seq_show = cpu_stat_show,
@@ -4781,9 +4863,11 @@ static void css_release_work_fn(struct work_struct *work)
 		if (cgroup_on_dfl(cgrp))
 			cgroup_rstat_flush(cgrp);
 
+		spin_lock_irq(&css_set_lock);
 		for (tcgrp = cgroup_parent(cgrp); tcgrp;
 		     tcgrp = cgroup_parent(tcgrp))
 			tcgrp->nr_dying_descendants--;
+		spin_unlock_irq(&css_set_lock);
 
 		cgroup_idr_remove(&cgrp->root->cgroup_idr, cgrp->id);
 		cgrp->id = -1;
@@ -5001,12 +5085,31 @@ static struct cgroup *cgroup_create(struct cgroup *parent)
 	if (ret)
 		goto out_psi_free;
 
+	/*
+	 * New cgroup inherits effective freeze counter, and
+	 * if the parent has to be frozen, the child has too.
+	 */
+	cgrp->freezer.e_freeze = parent->freezer.e_freeze;
+	if (cgrp->freezer.e_freeze)
+		set_bit(CGRP_FROZEN, &cgrp->flags);
+
+	spin_lock_irq(&css_set_lock);
 	for (tcgrp = cgrp; tcgrp; tcgrp = cgroup_parent(tcgrp)) {
 		cgrp->ancestor_ids[tcgrp->level] = tcgrp->id;
 
-		if (tcgrp != cgrp)
+		if (tcgrp != cgrp) {
 			tcgrp->nr_descendants++;
+
+			/*
+			 * If the new cgroup is frozen, all ancestor cgroups
+			 * get a new frozen descendant, but their state can't
+			 * change because of this.
+			 */
+			if (cgrp->freezer.e_freeze)
+				tcgrp->freezer.nr_frozen_descendants++;
+		}
 	}
+	spin_unlock_irq(&css_set_lock);
 
 	if (notify_on_release(parent))
 		set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
@@ -5291,10 +5394,18 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
 	if (parent && cgroup_is_threaded(cgrp))
 		parent->nr_threaded_children--;
 
+	spin_lock_irq(&css_set_lock);
 	for (tcgrp = cgroup_parent(cgrp); tcgrp; tcgrp = cgroup_parent(tcgrp)) {
 		tcgrp->nr_descendants--;
 		tcgrp->nr_dying_descendants++;
+		/*
+		 * If the dying cgroup is frozen, decrease frozen descendants
+		 * counters of ancestor cgroups.
+		 */
+		if (test_bit(CGRP_FROZEN, &cgrp->flags))
+			tcgrp->freezer.nr_frozen_descendants--;
 	}
+	spin_unlock_irq(&css_set_lock);
 
 	cgroup1_check_for_release(parent);
 
@@ -5746,6 +5857,26 @@ void cgroup_post_fork(struct task_struct *child)
 			cset->nr_tasks++;
 			css_set_move_task(child, NULL, cset, false);
 		}
+
+		/*
+		 * If the cgroup has to be frozen, the new task has too.
+		 * Let's set the JOBCTL_TRAP_FREEZE jobctl bit to get
+		 * the task into the frozen state.
+		 */
+		if (unlikely(cgroup_task_freeze(child))) {
+			spin_lock(&child->sighand->siglock);
+			WARN_ON_ONCE(child->frozen);
+			child->jobctl |= JOBCTL_TRAP_FREEZE;
+			spin_unlock(&child->sighand->siglock);
+
+			/*
+			 * Calling cgroup_update_frozen() isn't required here,
+			 * because it will be called anyway a bit later
+			 * from do_freezer_trap(). So we avoid cgroup's
+			 * transient switch from the frozen state and back.
+			 */
+		}
+
 		spin_unlock_irq(&css_set_lock);
 	}
 
@@ -5794,6 +5925,11 @@ void cgroup_exit(struct task_struct *tsk)
 		spin_lock_irq(&css_set_lock);
 		css_set_move_task(tsk, cset, NULL, false);
 		cset->nr_tasks--;
+
+		WARN_ON_ONCE(cgroup_task_frozen(tsk));
+		if (unlikely(cgroup_task_freeze(tsk)))
+			cgroup_update_frozen(task_dfl_cgroup(tsk));
+
 		spin_unlock_irq(&css_set_lock);
 	} else {
 		get_css_set(cset);
diff --git a/kernel/cgroup/debug.c b/kernel/cgroup/debug.c
index 5f1b87330bee..80aa3f027ac3 100644
--- a/kernel/cgroup/debug.c
+++ b/kernel/cgroup/debug.c
@@ -64,8 +64,8 @@ static int current_css_set_read(struct seq_file *seq, void *v)
 		css = cset->subsys[ss->id];
 		if (!css)
 			continue;
-		seq_printf(seq, "%2d: %-4s\t- %lx[%d]\n", ss->id, ss->name,
-			  (unsigned long)css, css->id);
+		seq_printf(seq, "%2d: %-4s\t- %p[%d]\n", ss->id, ss->name,
+			  css, css->id);
 	}
 	rcu_read_unlock();
 	spin_unlock_irq(&css_set_lock);
@@ -224,8 +224,8 @@ static int cgroup_subsys_states_read(struct seq_file *seq, void *v)
 		if (css->parent)
 			snprintf(pbuf, sizeof(pbuf) - 1, " P=%d",
 				 css->parent->id);
-		seq_printf(seq, "%2d: %-4s\t- %lx[%d] %d%s\n", ss->id, ss->name,
-			  (unsigned long)css, css->id,
+		seq_printf(seq, "%2d: %-4s\t- %p[%d] %d%s\n", ss->id, ss->name,
+			  css, css->id,
 			  atomic_read(&css->online_cnt), pbuf);
 	}
 
diff --git a/kernel/cgroup/freezer.c b/kernel/cgroup/freezer.c
index 08236798d173..8cf010680678 100644
--- a/kernel/cgroup/freezer.c
+++ b/kernel/cgroup/freezer.c
@@ -1,481 +1,314 @@
-/*
- * cgroup_freezer.c -  control group freezer subsystem
- *
- * Copyright IBM Corporation, 2007
- *
- * Author : Cedric Le Goater <clg@fr.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2.1 of the GNU Lesser General Public License
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- */
-
-#include <linux/export.h>
-#include <linux/slab.h>
+//SPDX-License-Identifier: GPL-2.0
 #include <linux/cgroup.h>
-#include <linux/fs.h>
-#include <linux/uaccess.h>
-#include <linux/freezer.h>
-#include <linux/seq_file.h>
-#include <linux/mutex.h>
-
-/*
- * A cgroup is freezing if any FREEZING flags are set.  FREEZING_SELF is
- * set if "FROZEN" is written to freezer.state cgroupfs file, and cleared
- * for "THAWED".  FREEZING_PARENT is set if the parent freezer is FREEZING
- * for whatever reason.  IOW, a cgroup has FREEZING_PARENT set if one of
- * its ancestors has FREEZING_SELF set.
- */
-enum freezer_state_flags {
-	CGROUP_FREEZER_ONLINE	= (1 << 0), /* freezer is fully online */
-	CGROUP_FREEZING_SELF	= (1 << 1), /* this freezer is freezing */
-	CGROUP_FREEZING_PARENT	= (1 << 2), /* the parent freezer is freezing */
-	CGROUP_FROZEN		= (1 << 3), /* this and its descendants frozen */
+#include <linux/sched.h>
+#include <linux/sched/task.h>
+#include <linux/sched/signal.h>
 
-	/* mask for all FREEZING flags */
-	CGROUP_FREEZING		= CGROUP_FREEZING_SELF | CGROUP_FREEZING_PARENT,
-};
+#include "cgroup-internal.h"
 
-struct freezer {
-	struct cgroup_subsys_state	css;
-	unsigned int			state;
-};
+#include <trace/events/cgroup.h>
 
-static DEFINE_MUTEX(freezer_mutex);
-
-static inline struct freezer *css_freezer(struct cgroup_subsys_state *css)
-{
-	return css ? container_of(css, struct freezer, css) : NULL;
-}
-
-static inline struct freezer *task_freezer(struct task_struct *task)
+/*
+ * Propagate the cgroup frozen state upwards by the cgroup tree.
+ */
+static void cgroup_propagate_frozen(struct cgroup *cgrp, bool frozen)
 {
-	return css_freezer(task_css(task, freezer_cgrp_id));
-}
+	int desc = 1;
 
-static struct freezer *parent_freezer(struct freezer *freezer)
-{
-	return css_freezer(freezer->css.parent);
+	/*
+	 * If the new state is frozen, some freezing ancestor cgroups may change
+	 * their state too, depending on if all their descendants are frozen.
+	 *
+	 * Otherwise, all ancestor cgroups are forced into the non-frozen state.
+	 */
+	while ((cgrp = cgroup_parent(cgrp))) {
+		if (frozen) {
+			cgrp->freezer.nr_frozen_descendants += desc;
+			if (!test_bit(CGRP_FROZEN, &cgrp->flags) &&
+			    test_bit(CGRP_FREEZE, &cgrp->flags) &&
+			    cgrp->freezer.nr_frozen_descendants ==
+			    cgrp->nr_descendants) {
+				set_bit(CGRP_FROZEN, &cgrp->flags);
+				cgroup_file_notify(&cgrp->events_file);
+				TRACE_CGROUP_PATH(notify_frozen, cgrp, 1);
+				desc++;
+			}
+		} else {
+			cgrp->freezer.nr_frozen_descendants -= desc;
+			if (test_bit(CGRP_FROZEN, &cgrp->flags)) {
+				clear_bit(CGRP_FROZEN, &cgrp->flags);
+				cgroup_file_notify(&cgrp->events_file);
+				TRACE_CGROUP_PATH(notify_frozen, cgrp, 0);
+				desc++;
+			}
+		}
+	}
 }
 
-bool cgroup_freezing(struct task_struct *task)
+/*
+ * Revisit the cgroup frozen state.
+ * Checks if the cgroup is really frozen and perform all state transitions.
+ */
+void cgroup_update_frozen(struct cgroup *cgrp)
 {
-	bool ret;
+	bool frozen;
 
-	rcu_read_lock();
-	ret = task_freezer(task)->state & CGROUP_FREEZING;
-	rcu_read_unlock();
+	lockdep_assert_held(&css_set_lock);
 
-	return ret;
-}
+	/*
+	 * If the cgroup has to be frozen (CGRP_FREEZE bit set),
+	 * and all tasks are frozen and/or stopped, let's consider
+	 * the cgroup frozen. Otherwise it's not frozen.
+	 */
+	frozen = test_bit(CGRP_FREEZE, &cgrp->flags) &&
+		cgrp->freezer.nr_frozen_tasks == __cgroup_task_count(cgrp);
 
-static const char *freezer_state_strs(unsigned int state)
-{
-	if (state & CGROUP_FROZEN)
-		return "FROZEN";
-	if (state & CGROUP_FREEZING)
-		return "FREEZING";
-	return "THAWED";
-};
+	if (frozen) {
+		/* Already there? */
+		if (test_bit(CGRP_FROZEN, &cgrp->flags))
+			return;
 
-static struct cgroup_subsys_state *
-freezer_css_alloc(struct cgroup_subsys_state *parent_css)
-{
-	struct freezer *freezer;
+		set_bit(CGRP_FROZEN, &cgrp->flags);
+	} else {
+		/* Already there? */
+		if (!test_bit(CGRP_FROZEN, &cgrp->flags))
+			return;
 
-	freezer = kzalloc(sizeof(struct freezer), GFP_KERNEL);
-	if (!freezer)
-		return ERR_PTR(-ENOMEM);
+		clear_bit(CGRP_FROZEN, &cgrp->flags);
+	}
+	cgroup_file_notify(&cgrp->events_file);
+	TRACE_CGROUP_PATH(notify_frozen, cgrp, frozen);
 
-	return &freezer->css;
+	/* Update the state of ancestor cgroups. */
+	cgroup_propagate_frozen(cgrp, frozen);
 }
 
-/**
- * freezer_css_online - commit creation of a freezer css
- * @css: css being created
- *
- * We're committing to creation of @css.  Mark it online and inherit
- * parent's freezing state while holding both parent's and our
- * freezer->lock.
+/*
+ * Increment cgroup's nr_frozen_tasks.
  */
-static int freezer_css_online(struct cgroup_subsys_state *css)
+static void cgroup_inc_frozen_cnt(struct cgroup *cgrp)
 {
-	struct freezer *freezer = css_freezer(css);
-	struct freezer *parent = parent_freezer(freezer);
-
-	mutex_lock(&freezer_mutex);
-
-	freezer->state |= CGROUP_FREEZER_ONLINE;
-
-	if (parent && (parent->state & CGROUP_FREEZING)) {
-		freezer->state |= CGROUP_FREEZING_PARENT | CGROUP_FROZEN;
-		atomic_inc(&system_freezing_cnt);
-	}
-
-	mutex_unlock(&freezer_mutex);
-	return 0;
+	cgrp->freezer.nr_frozen_tasks++;
 }
 
-/**
- * freezer_css_offline - initiate destruction of a freezer css
- * @css: css being destroyed
- *
- * @css is going away.  Mark it dead and decrement system_freezing_count if
- * it was holding one.
+/*
+ * Decrement cgroup's nr_frozen_tasks.
  */
-static void freezer_css_offline(struct cgroup_subsys_state *css)
+static void cgroup_dec_frozen_cnt(struct cgroup *cgrp)
 {
-	struct freezer *freezer = css_freezer(css);
-
-	mutex_lock(&freezer_mutex);
-
-	if (freezer->state & CGROUP_FREEZING)
-		atomic_dec(&system_freezing_cnt);
-
-	freezer->state = 0;
-
-	mutex_unlock(&freezer_mutex);
+	cgrp->freezer.nr_frozen_tasks--;
+	WARN_ON_ONCE(cgrp->freezer.nr_frozen_tasks < 0);
 }
 
-static void freezer_css_free(struct cgroup_subsys_state *css)
+/*
+ * Enter frozen/stopped state, if not yet there. Update cgroup's counters,
+ * and revisit the state of the cgroup, if necessary.
+ */
+void cgroup_enter_frozen(void)
 {
-	kfree(css_freezer(css));
+	struct cgroup *cgrp;
+
+	if (current->frozen)
+		return;
+
+	spin_lock_irq(&css_set_lock);
+	current->frozen = true;
+	cgrp = task_dfl_cgroup(current);
+	cgroup_inc_frozen_cnt(cgrp);
+	cgroup_update_frozen(cgrp);
+	spin_unlock_irq(&css_set_lock);
 }
 
 /*
- * Tasks can be migrated into a different freezer anytime regardless of its
- * current state.  freezer_attach() is responsible for making new tasks
- * conform to the current state.
+ * Conditionally leave frozen/stopped state. Update cgroup's counters,
+ * and revisit the state of the cgroup, if necessary.
  *
- * Freezer state changes and task migration are synchronized via
- * @freezer->lock.  freezer_attach() makes the new tasks conform to the
- * current state and all following state changes can see the new tasks.
+ * If always_leave is not set, and the cgroup is freezing,
+ * we're racing with the cgroup freezing. In this case, we don't
+ * drop the frozen counter to avoid a transient switch to
+ * the unfrozen state.
  */
-static void freezer_attach(struct cgroup_taskset *tset)
+void cgroup_leave_frozen(bool always_leave)
 {
-	struct task_struct *task;
-	struct cgroup_subsys_state *new_css;
-
-	mutex_lock(&freezer_mutex);
+	struct cgroup *cgrp;
 
-	/*
-	 * Make the new tasks conform to the current state of @new_css.
-	 * For simplicity, when migrating any task to a FROZEN cgroup, we
-	 * revert it to FREEZING and let update_if_frozen() determine the
-	 * correct state later.
-	 *
-	 * Tasks in @tset are on @new_css but may not conform to its
-	 * current state before executing the following - !frozen tasks may
-	 * be visible in a FROZEN cgroup and frozen tasks in a THAWED one.
-	 */
-	cgroup_taskset_for_each(task, new_css, tset) {
-		struct freezer *freezer = css_freezer(new_css);
-
-		if (!(freezer->state & CGROUP_FREEZING)) {
-			__thaw_task(task);
-		} else {
-			freeze_task(task);
-			/* clear FROZEN and propagate upwards */
-			while (freezer && (freezer->state & CGROUP_FROZEN)) {
-				freezer->state &= ~CGROUP_FROZEN;
-				freezer = parent_freezer(freezer);
-			}
-		}
+	spin_lock_irq(&css_set_lock);
+	cgrp = task_dfl_cgroup(current);
+	if (always_leave || !test_bit(CGRP_FREEZE, &cgrp->flags)) {
+		cgroup_dec_frozen_cnt(cgrp);
+		cgroup_update_frozen(cgrp);
+		WARN_ON_ONCE(!current->frozen);
+		current->frozen = false;
+	} else if (!(current->jobctl & JOBCTL_TRAP_FREEZE)) {
+		spin_lock(&current->sighand->siglock);
+		current->jobctl |= JOBCTL_TRAP_FREEZE;
+		set_thread_flag(TIF_SIGPENDING);
+		spin_unlock(&current->sighand->siglock);
 	}
-
-	mutex_unlock(&freezer_mutex);
+	spin_unlock_irq(&css_set_lock);
 }
 
-/**
- * freezer_fork - cgroup post fork callback
- * @task: a task which has just been forked
- *
- * @task has just been created and should conform to the current state of
- * the cgroup_freezer it belongs to.  This function may race against
- * freezer_attach().  Losing to freezer_attach() means that we don't have
- * to do anything as freezer_attach() will put @task into the appropriate
- * state.
+/*
+ * Freeze or unfreeze the task by setting or clearing the JOBCTL_TRAP_FREEZE
+ * jobctl bit.
  */
-static void freezer_fork(struct task_struct *task)
+static void cgroup_freeze_task(struct task_struct *task, bool freeze)
 {
-	struct freezer *freezer;
+	unsigned long flags;
 
-	/*
-	 * The root cgroup is non-freezable, so we can skip locking the
-	 * freezer.  This is safe regardless of race with task migration.
-	 * If we didn't race or won, skipping is obviously the right thing
-	 * to do.  If we lost and root is the new cgroup, noop is still the
-	 * right thing to do.
-	 */
-	if (task_css_is_root(task, freezer_cgrp_id))
+	/* If the task is about to die, don't bother with freezing it. */
+	if (!lock_task_sighand(task, &flags))
 		return;
 
-	mutex_lock(&freezer_mutex);
-	rcu_read_lock();
-
-	freezer = task_freezer(task);
-	if (freezer->state & CGROUP_FREEZING)
-		freeze_task(task);
+	if (freeze) {
+		task->jobctl |= JOBCTL_TRAP_FREEZE;
+		signal_wake_up(task, false);
+	} else {
+		task->jobctl &= ~JOBCTL_TRAP_FREEZE;
+		wake_up_process(task);
+	}
 
-	rcu_read_unlock();
-	mutex_unlock(&freezer_mutex);
+	unlock_task_sighand(task, &flags);
 }
 
-/**
- * update_if_frozen - update whether a cgroup finished freezing
- * @css: css of interest
- *
- * Once FREEZING is initiated, transition to FROZEN is lazily updated by
- * calling this function.  If the current state is FREEZING but not FROZEN,
- * this function checks whether all tasks of this cgroup and the descendant
- * cgroups finished freezing and, if so, sets FROZEN.
- *
- * The caller is responsible for grabbing RCU read lock and calling
- * update_if_frozen() on all descendants prior to invoking this function.
- *
- * Task states and freezer state might disagree while tasks are being
- * migrated into or out of @css, so we can't verify task states against
- * @freezer state here.  See freezer_attach() for details.
+/*
+ * Freeze or unfreeze all tasks in the given cgroup.
  */
-static void update_if_frozen(struct cgroup_subsys_state *css)
+static void cgroup_do_freeze(struct cgroup *cgrp, bool freeze)
 {
-	struct freezer *freezer = css_freezer(css);
-	struct cgroup_subsys_state *pos;
 	struct css_task_iter it;
 	struct task_struct *task;
 
-	lockdep_assert_held(&freezer_mutex);
-
-	if (!(freezer->state & CGROUP_FREEZING) ||
-	    (freezer->state & CGROUP_FROZEN))
-		return;
-
-	/* are all (live) children frozen? */
-	rcu_read_lock();
-	css_for_each_child(pos, css) {
-		struct freezer *child = css_freezer(pos);
+	lockdep_assert_held(&cgroup_mutex);
 
-		if ((child->state & CGROUP_FREEZER_ONLINE) &&
-		    !(child->state & CGROUP_FROZEN)) {
-			rcu_read_unlock();
-			return;
-		}
-	}
-	rcu_read_unlock();
+	spin_lock_irq(&css_set_lock);
+	if (freeze)
+		set_bit(CGRP_FREEZE, &cgrp->flags);
+	else
+		clear_bit(CGRP_FREEZE, &cgrp->flags);
+	spin_unlock_irq(&css_set_lock);
 
-	/* are all tasks frozen? */
-	css_task_iter_start(css, 0, &it);
+	if (freeze)
+		TRACE_CGROUP_PATH(freeze, cgrp);
+	else
+		TRACE_CGROUP_PATH(unfreeze, cgrp);
 
+	css_task_iter_start(&cgrp->self, 0, &it);
 	while ((task = css_task_iter_next(&it))) {
-		if (freezing(task)) {
-			/*
-			 * freezer_should_skip() indicates that the task
-			 * should be skipped when determining freezing
-			 * completion.  Consider it frozen in addition to
-			 * the usual frozen condition.
-			 */
-			if (!frozen(task) && !freezer_should_skip(task))
-				goto out_iter_end;
-		}
-	}
-
-	freezer->state |= CGROUP_FROZEN;
-out_iter_end:
-	css_task_iter_end(&it);
-}
-
-static int freezer_read(struct seq_file *m, void *v)
-{
-	struct cgroup_subsys_state *css = seq_css(m), *pos;
-
-	mutex_lock(&freezer_mutex);
-	rcu_read_lock();
-
-	/* update states bottom-up */
-	css_for_each_descendant_post(pos, css) {
-		if (!css_tryget_online(pos))
+		/*
+		 * Ignore kernel threads here. Freezing cgroups containing
+		 * kthreads isn't supported.
+		 */
+		if (task->flags & PF_KTHREAD)
 			continue;
-		rcu_read_unlock();
-
-		update_if_frozen(pos);
-
-		rcu_read_lock();
-		css_put(pos);
+		cgroup_freeze_task(task, freeze);
 	}
-
-	rcu_read_unlock();
-	mutex_unlock(&freezer_mutex);
-
-	seq_puts(m, freezer_state_strs(css_freezer(css)->state));
-	seq_putc(m, '\n');
-	return 0;
-}
-
-static void freeze_cgroup(struct freezer *freezer)
-{
-	struct css_task_iter it;
-	struct task_struct *task;
-
-	css_task_iter_start(&freezer->css, 0, &it);
-	while ((task = css_task_iter_next(&it)))
-		freeze_task(task);
 	css_task_iter_end(&it);
-}
-
-static void unfreeze_cgroup(struct freezer *freezer)
-{
-	struct css_task_iter it;
-	struct task_struct *task;
 
-	css_task_iter_start(&freezer->css, 0, &it);
-	while ((task = css_task_iter_next(&it)))
-		__thaw_task(task);
-	css_task_iter_end(&it);
+	/*
+	 * Cgroup state should be revisited here to cover empty leaf cgroups
+	 * and cgroups which descendants are already in the desired state.
+	 */
+	spin_lock_irq(&css_set_lock);
+	if (cgrp->nr_descendants == cgrp->freezer.nr_frozen_descendants)
+		cgroup_update_frozen(cgrp);
+	spin_unlock_irq(&css_set_lock);
 }
 
-/**
- * freezer_apply_state - apply state change to a single cgroup_freezer
- * @freezer: freezer to apply state change to
- * @freeze: whether to freeze or unfreeze
- * @state: CGROUP_FREEZING_* flag to set or clear
- *
- * Set or clear @state on @cgroup according to @freeze, and perform
- * freezing or thawing as necessary.
+/*
+ * Adjust the task state (freeze or unfreeze) and revisit the state of
+ * source and destination cgroups.
  */
-static void freezer_apply_state(struct freezer *freezer, bool freeze,
-				unsigned int state)
+void cgroup_freezer_migrate_task(struct task_struct *task,
+				 struct cgroup *src, struct cgroup *dst)
 {
-	/* also synchronizes against task migration, see freezer_attach() */
-	lockdep_assert_held(&freezer_mutex);
+	lockdep_assert_held(&css_set_lock);
 
-	if (!(freezer->state & CGROUP_FREEZER_ONLINE))
+	/*
+	 * Kernel threads are not supposed to be frozen at all.
+	 */
+	if (task->flags & PF_KTHREAD)
 		return;
 
-	if (freeze) {
-		if (!(freezer->state & CGROUP_FREEZING))
-			atomic_inc(&system_freezing_cnt);
-		freezer->state |= state;
-		freeze_cgroup(freezer);
-	} else {
-		bool was_freezing = freezer->state & CGROUP_FREEZING;
-
-		freezer->state &= ~state;
-
-		if (!(freezer->state & CGROUP_FREEZING)) {
-			if (was_freezing)
-				atomic_dec(&system_freezing_cnt);
-			freezer->state &= ~CGROUP_FROZEN;
-			unfreeze_cgroup(freezer);
-		}
+	/*
+	 * Adjust counters of freezing and frozen tasks.
+	 * Note, that if the task is frozen, but the destination cgroup is not
+	 * frozen, we bump both counters to keep them balanced.
+	 */
+	if (task->frozen) {
+		cgroup_inc_frozen_cnt(dst);
+		cgroup_dec_frozen_cnt(src);
 	}
-}
-
-/**
- * freezer_change_state - change the freezing state of a cgroup_freezer
- * @freezer: freezer of interest
- * @freeze: whether to freeze or thaw
- *
- * Freeze or thaw @freezer according to @freeze.  The operations are
- * recursive - all descendants of @freezer will be affected.
- */
-static void freezer_change_state(struct freezer *freezer, bool freeze)
-{
-	struct cgroup_subsys_state *pos;
+	cgroup_update_frozen(dst);
+	cgroup_update_frozen(src);
 
 	/*
-	 * Update all its descendants in pre-order traversal.  Each
-	 * descendant will try to inherit its parent's FREEZING state as
-	 * CGROUP_FREEZING_PARENT.
+	 * Force the task to the desired state.
 	 */
-	mutex_lock(&freezer_mutex);
-	rcu_read_lock();
-	css_for_each_descendant_pre(pos, &freezer->css) {
-		struct freezer *pos_f = css_freezer(pos);
-		struct freezer *parent = parent_freezer(pos_f);
-
-		if (!css_tryget_online(pos))
-			continue;
-		rcu_read_unlock();
-
-		if (pos_f == freezer)
-			freezer_apply_state(pos_f, freeze,
-					    CGROUP_FREEZING_SELF);
-		else
-			freezer_apply_state(pos_f,
-					    parent->state & CGROUP_FREEZING,
-					    CGROUP_FREEZING_PARENT);
-
-		rcu_read_lock();
-		css_put(pos);
-	}
-	rcu_read_unlock();
-	mutex_unlock(&freezer_mutex);
+	cgroup_freeze_task(task, test_bit(CGRP_FREEZE, &dst->flags));
 }
 
-static ssize_t freezer_write(struct kernfs_open_file *of,
-			     char *buf, size_t nbytes, loff_t off)
+void cgroup_freeze(struct cgroup *cgrp, bool freeze)
 {
-	bool freeze;
+	struct cgroup_subsys_state *css;
+	struct cgroup *dsct;
+	bool applied = false;
 
-	buf = strstrip(buf);
+	lockdep_assert_held(&cgroup_mutex);
 
-	if (strcmp(buf, freezer_state_strs(0)) == 0)
-		freeze = false;
-	else if (strcmp(buf, freezer_state_strs(CGROUP_FROZEN)) == 0)
-		freeze = true;
-	else
-		return -EINVAL;
+	/*
+	 * Nothing changed? Just exit.
+	 */
+	if (cgrp->freezer.freeze == freeze)
+		return;
 
-	freezer_change_state(css_freezer(of_css(of)), freeze);
-	return nbytes;
-}
+	cgrp->freezer.freeze = freeze;
 
-static u64 freezer_self_freezing_read(struct cgroup_subsys_state *css,
-				      struct cftype *cft)
-{
-	struct freezer *freezer = css_freezer(css);
+	/*
+	 * Propagate changes downwards the cgroup tree.
+	 */
+	css_for_each_descendant_pre(css, &cgrp->self) {
+		dsct = css->cgroup;
 
-	return (bool)(freezer->state & CGROUP_FREEZING_SELF);
-}
+		if (cgroup_is_dead(dsct))
+			continue;
 
-static u64 freezer_parent_freezing_read(struct cgroup_subsys_state *css,
-					struct cftype *cft)
-{
-	struct freezer *freezer = css_freezer(css);
+		if (freeze) {
+			dsct->freezer.e_freeze++;
+			/*
+			 * Already frozen because of ancestor's settings?
+			 */
+			if (dsct->freezer.e_freeze > 1)
+				continue;
+		} else {
+			dsct->freezer.e_freeze--;
+			/*
+			 * Still frozen because of ancestor's settings?
+			 */
+			if (dsct->freezer.e_freeze > 0)
+				continue;
 
-	return (bool)(freezer->state & CGROUP_FREEZING_PARENT);
-}
+			WARN_ON_ONCE(dsct->freezer.e_freeze < 0);
+		}
 
-static struct cftype files[] = {
-	{
-		.name = "state",
-		.flags = CFTYPE_NOT_ON_ROOT,
-		.seq_show = freezer_read,
-		.write = freezer_write,
-	},
-	{
-		.name = "self_freezing",
-		.flags = CFTYPE_NOT_ON_ROOT,
-		.read_u64 = freezer_self_freezing_read,
-	},
-	{
-		.name = "parent_freezing",
-		.flags = CFTYPE_NOT_ON_ROOT,
-		.read_u64 = freezer_parent_freezing_read,
-	},
-	{ }	/* terminate */
-};
+		/*
+		 * Do change actual state: freeze or unfreeze.
+		 */
+		cgroup_do_freeze(dsct, freeze);
+		applied = true;
+	}
 
-struct cgroup_subsys freezer_cgrp_subsys = {
-	.css_alloc	= freezer_css_alloc,
-	.css_online	= freezer_css_online,
-	.css_offline	= freezer_css_offline,
-	.css_free	= freezer_css_free,
-	.attach		= freezer_attach,
-	.fork		= freezer_fork,
-	.legacy_cftypes	= files,
-};
+	/*
+	 * Even if the actual state hasn't changed, let's notify a user.
+	 * The state can be enforced by an ancestor cgroup: the cgroup
+	 * can already be in the desired state or it can be locked in the
+	 * opposite state, so that the transition will never happen.
+	 * In both cases it's better to notify a user, that there is
+	 * nothing to wait for.
+	 */
+	if (!applied) {
+		TRACE_CGROUP_PATH(notify_frozen, cgrp,
+				  test_bit(CGRP_FROZEN, &cgrp->flags));
+		cgroup_file_notify(&cgrp->events_file);
+	}
+}
diff --git a/kernel/cgroup/legacy_freezer.c b/kernel/cgroup/legacy_freezer.c
new file mode 100644
index 000000000000..08236798d173
--- /dev/null
+++ b/kernel/cgroup/legacy_freezer.c
@@ -0,0 +1,481 @@
+/*
+ * cgroup_freezer.c -  control group freezer subsystem
+ *
+ * Copyright IBM Corporation, 2007
+ *
+ * Author : Cedric Le Goater <clg@fr.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2.1 of the GNU Lesser General Public License
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ */
+
+#include <linux/export.h>
+#include <linux/slab.h>
+#include <linux/cgroup.h>
+#include <linux/fs.h>
+#include <linux/uaccess.h>
+#include <linux/freezer.h>
+#include <linux/seq_file.h>
+#include <linux/mutex.h>
+
+/*
+ * A cgroup is freezing if any FREEZING flags are set.  FREEZING_SELF is
+ * set if "FROZEN" is written to freezer.state cgroupfs file, and cleared
+ * for "THAWED".  FREEZING_PARENT is set if the parent freezer is FREEZING
+ * for whatever reason.  IOW, a cgroup has FREEZING_PARENT set if one of
+ * its ancestors has FREEZING_SELF set.
+ */
+enum freezer_state_flags {
+	CGROUP_FREEZER_ONLINE	= (1 << 0), /* freezer is fully online */
+	CGROUP_FREEZING_SELF	= (1 << 1), /* this freezer is freezing */
+	CGROUP_FREEZING_PARENT	= (1 << 2), /* the parent freezer is freezing */
+	CGROUP_FROZEN		= (1 << 3), /* this and its descendants frozen */
+
+	/* mask for all FREEZING flags */
+	CGROUP_FREEZING		= CGROUP_FREEZING_SELF | CGROUP_FREEZING_PARENT,
+};
+
+struct freezer {
+	struct cgroup_subsys_state	css;
+	unsigned int			state;
+};
+
+static DEFINE_MUTEX(freezer_mutex);
+
+static inline struct freezer *css_freezer(struct cgroup_subsys_state *css)
+{
+	return css ? container_of(css, struct freezer, css) : NULL;
+}
+
+static inline struct freezer *task_freezer(struct task_struct *task)
+{
+	return css_freezer(task_css(task, freezer_cgrp_id));
+}
+
+static struct freezer *parent_freezer(struct freezer *freezer)
+{
+	return css_freezer(freezer->css.parent);
+}
+
+bool cgroup_freezing(struct task_struct *task)
+{
+	bool ret;
+
+	rcu_read_lock();
+	ret = task_freezer(task)->state & CGROUP_FREEZING;
+	rcu_read_unlock();
+
+	return ret;
+}
+
+static const char *freezer_state_strs(unsigned int state)
+{
+	if (state & CGROUP_FROZEN)
+		return "FROZEN";
+	if (state & CGROUP_FREEZING)
+		return "FREEZING";
+	return "THAWED";
+};
+
+static struct cgroup_subsys_state *
+freezer_css_alloc(struct cgroup_subsys_state *parent_css)
+{
+	struct freezer *freezer;
+
+	freezer = kzalloc(sizeof(struct freezer), GFP_KERNEL);
+	if (!freezer)
+		return ERR_PTR(-ENOMEM);
+
+	return &freezer->css;
+}
+
+/**
+ * freezer_css_online - commit creation of a freezer css
+ * @css: css being created
+ *
+ * We're committing to creation of @css.  Mark it online and inherit
+ * parent's freezing state while holding both parent's and our
+ * freezer->lock.
+ */
+static int freezer_css_online(struct cgroup_subsys_state *css)
+{
+	struct freezer *freezer = css_freezer(css);
+	struct freezer *parent = parent_freezer(freezer);
+
+	mutex_lock(&freezer_mutex);
+
+	freezer->state |= CGROUP_FREEZER_ONLINE;
+
+	if (parent && (parent->state & CGROUP_FREEZING)) {
+		freezer->state |= CGROUP_FREEZING_PARENT | CGROUP_FROZEN;
+		atomic_inc(&system_freezing_cnt);
+	}
+
+	mutex_unlock(&freezer_mutex);
+	return 0;
+}
+
+/**
+ * freezer_css_offline - initiate destruction of a freezer css
+ * @css: css being destroyed
+ *
+ * @css is going away.  Mark it dead and decrement system_freezing_count if
+ * it was holding one.
+ */
+static void freezer_css_offline(struct cgroup_subsys_state *css)
+{
+	struct freezer *freezer = css_freezer(css);
+
+	mutex_lock(&freezer_mutex);
+
+	if (freezer->state & CGROUP_FREEZING)
+		atomic_dec(&system_freezing_cnt);
+
+	freezer->state = 0;
+
+	mutex_unlock(&freezer_mutex);
+}
+
+static void freezer_css_free(struct cgroup_subsys_state *css)
+{
+	kfree(css_freezer(css));
+}
+
+/*
+ * Tasks can be migrated into a different freezer anytime regardless of its
+ * current state.  freezer_attach() is responsible for making new tasks
+ * conform to the current state.
+ *
+ * Freezer state changes and task migration are synchronized via
+ * @freezer->lock.  freezer_attach() makes the new tasks conform to the
+ * current state and all following state changes can see the new tasks.
+ */
+static void freezer_attach(struct cgroup_taskset *tset)
+{
+	struct task_struct *task;
+	struct cgroup_subsys_state *new_css;
+
+	mutex_lock(&freezer_mutex);
+
+	/*
+	 * Make the new tasks conform to the current state of @new_css.
+	 * For simplicity, when migrating any task to a FROZEN cgroup, we
+	 * revert it to FREEZING and let update_if_frozen() determine the
+	 * correct state later.
+	 *
+	 * Tasks in @tset are on @new_css but may not conform to its
+	 * current state before executing the following - !frozen tasks may
+	 * be visible in a FROZEN cgroup and frozen tasks in a THAWED one.
+	 */
+	cgroup_taskset_for_each(task, new_css, tset) {
+		struct freezer *freezer = css_freezer(new_css);
+
+		if (!(freezer->state & CGROUP_FREEZING)) {
+			__thaw_task(task);
+		} else {
+			freeze_task(task);
+			/* clear FROZEN and propagate upwards */
+			while (freezer && (freezer->state & CGROUP_FROZEN)) {
+				freezer->state &= ~CGROUP_FROZEN;
+				freezer = parent_freezer(freezer);
+			}
+		}
+	}
+
+	mutex_unlock(&freezer_mutex);
+}
+
+/**
+ * freezer_fork - cgroup post fork callback
+ * @task: a task which has just been forked
+ *
+ * @task has just been created and should conform to the current state of
+ * the cgroup_freezer it belongs to.  This function may race against
+ * freezer_attach().  Losing to freezer_attach() means that we don't have
+ * to do anything as freezer_attach() will put @task into the appropriate
+ * state.
+ */
+static void freezer_fork(struct task_struct *task)
+{
+	struct freezer *freezer;
+
+	/*
+	 * The root cgroup is non-freezable, so we can skip locking the
+	 * freezer.  This is safe regardless of race with task migration.
+	 * If we didn't race or won, skipping is obviously the right thing
+	 * to do.  If we lost and root is the new cgroup, noop is still the
+	 * right thing to do.
+	 */
+	if (task_css_is_root(task, freezer_cgrp_id))
+		return;
+
+	mutex_lock(&freezer_mutex);
+	rcu_read_lock();
+
+	freezer = task_freezer(task);
+	if (freezer->state & CGROUP_FREEZING)
+		freeze_task(task);
+
+	rcu_read_unlock();
+	mutex_unlock(&freezer_mutex);
+}
+
+/**
+ * update_if_frozen - update whether a cgroup finished freezing
+ * @css: css of interest
+ *
+ * Once FREEZING is initiated, transition to FROZEN is lazily updated by
+ * calling this function.  If the current state is FREEZING but not FROZEN,
+ * this function checks whether all tasks of this cgroup and the descendant
+ * cgroups finished freezing and, if so, sets FROZEN.
+ *
+ * The caller is responsible for grabbing RCU read lock and calling
+ * update_if_frozen() on all descendants prior to invoking this function.
+ *
+ * Task states and freezer state might disagree while tasks are being
+ * migrated into or out of @css, so we can't verify task states against
+ * @freezer state here.  See freezer_attach() for details.
+ */
+static void update_if_frozen(struct cgroup_subsys_state *css)
+{
+	struct freezer *freezer = css_freezer(css);
+	struct cgroup_subsys_state *pos;
+	struct css_task_iter it;
+	struct task_struct *task;
+
+	lockdep_assert_held(&freezer_mutex);
+
+	if (!(freezer->state & CGROUP_FREEZING) ||
+	    (freezer->state & CGROUP_FROZEN))
+		return;
+
+	/* are all (live) children frozen? */
+	rcu_read_lock();
+	css_for_each_child(pos, css) {
+		struct freezer *child = css_freezer(pos);
+
+		if ((child->state & CGROUP_FREEZER_ONLINE) &&
+		    !(child->state & CGROUP_FROZEN)) {
+			rcu_read_unlock();
+			return;
+		}
+	}
+	rcu_read_unlock();
+
+	/* are all tasks frozen? */
+	css_task_iter_start(css, 0, &it);
+
+	while ((task = css_task_iter_next(&it))) {
+		if (freezing(task)) {
+			/*
+			 * freezer_should_skip() indicates that the task
+			 * should be skipped when determining freezing
+			 * completion.  Consider it frozen in addition to
+			 * the usual frozen condition.
+			 */
+			if (!frozen(task) && !freezer_should_skip(task))
+				goto out_iter_end;
+		}
+	}
+
+	freezer->state |= CGROUP_FROZEN;
+out_iter_end:
+	css_task_iter_end(&it);
+}
+
+static int freezer_read(struct seq_file *m, void *v)
+{
+	struct cgroup_subsys_state *css = seq_css(m), *pos;
+
+	mutex_lock(&freezer_mutex);
+	rcu_read_lock();
+
+	/* update states bottom-up */
+	css_for_each_descendant_post(pos, css) {
+		if (!css_tryget_online(pos))
+			continue;
+		rcu_read_unlock();
+
+		update_if_frozen(pos);
+
+		rcu_read_lock();
+		css_put(pos);
+	}
+
+	rcu_read_unlock();
+	mutex_unlock(&freezer_mutex);
+
+	seq_puts(m, freezer_state_strs(css_freezer(css)->state));
+	seq_putc(m, '\n');
+	return 0;
+}
+
+static void freeze_cgroup(struct freezer *freezer)
+{
+	struct css_task_iter it;
+	struct task_struct *task;
+
+	css_task_iter_start(&freezer->css, 0, &it);
+	while ((task = css_task_iter_next(&it)))
+		freeze_task(task);
+	css_task_iter_end(&it);
+}
+
+static void unfreeze_cgroup(struct freezer *freezer)
+{
+	struct css_task_iter it;
+	struct task_struct *task;
+
+	css_task_iter_start(&freezer->css, 0, &it);
+	while ((task = css_task_iter_next(&it)))
+		__thaw_task(task);
+	css_task_iter_end(&it);
+}
+
+/**
+ * freezer_apply_state - apply state change to a single cgroup_freezer
+ * @freezer: freezer to apply state change to
+ * @freeze: whether to freeze or unfreeze
+ * @state: CGROUP_FREEZING_* flag to set or clear
+ *
+ * Set or clear @state on @cgroup according to @freeze, and perform
+ * freezing or thawing as necessary.
+ */
+static void freezer_apply_state(struct freezer *freezer, bool freeze,
+				unsigned int state)
+{
+	/* also synchronizes against task migration, see freezer_attach() */
+	lockdep_assert_held(&freezer_mutex);
+
+	if (!(freezer->state & CGROUP_FREEZER_ONLINE))
+		return;
+
+	if (freeze) {
+		if (!(freezer->state & CGROUP_FREEZING))
+			atomic_inc(&system_freezing_cnt);
+		freezer->state |= state;
+		freeze_cgroup(freezer);
+	} else {
+		bool was_freezing = freezer->state & CGROUP_FREEZING;
+
+		freezer->state &= ~state;
+
+		if (!(freezer->state & CGROUP_FREEZING)) {
+			if (was_freezing)
+				atomic_dec(&system_freezing_cnt);
+			freezer->state &= ~CGROUP_FROZEN;
+			unfreeze_cgroup(freezer);
+		}
+	}
+}
+
+/**
+ * freezer_change_state - change the freezing state of a cgroup_freezer
+ * @freezer: freezer of interest
+ * @freeze: whether to freeze or thaw
+ *
+ * Freeze or thaw @freezer according to @freeze.  The operations are
+ * recursive - all descendants of @freezer will be affected.
+ */
+static void freezer_change_state(struct freezer *freezer, bool freeze)
+{
+	struct cgroup_subsys_state *pos;
+
+	/*
+	 * Update all its descendants in pre-order traversal.  Each
+	 * descendant will try to inherit its parent's FREEZING state as
+	 * CGROUP_FREEZING_PARENT.
+	 */
+	mutex_lock(&freezer_mutex);
+	rcu_read_lock();
+	css_for_each_descendant_pre(pos, &freezer->css) {
+		struct freezer *pos_f = css_freezer(pos);
+		struct freezer *parent = parent_freezer(pos_f);
+
+		if (!css_tryget_online(pos))
+			continue;
+		rcu_read_unlock();
+
+		if (pos_f == freezer)
+			freezer_apply_state(pos_f, freeze,
+					    CGROUP_FREEZING_SELF);
+		else
+			freezer_apply_state(pos_f,
+					    parent->state & CGROUP_FREEZING,
+					    CGROUP_FREEZING_PARENT);
+
+		rcu_read_lock();
+		css_put(pos);
+	}
+	rcu_read_unlock();
+	mutex_unlock(&freezer_mutex);
+}
+
+static ssize_t freezer_write(struct kernfs_open_file *of,
+			     char *buf, size_t nbytes, loff_t off)
+{
+	bool freeze;
+
+	buf = strstrip(buf);
+
+	if (strcmp(buf, freezer_state_strs(0)) == 0)
+		freeze = false;
+	else if (strcmp(buf, freezer_state_strs(CGROUP_FROZEN)) == 0)
+		freeze = true;
+	else
+		return -EINVAL;
+
+	freezer_change_state(css_freezer(of_css(of)), freeze);
+	return nbytes;
+}
+
+static u64 freezer_self_freezing_read(struct cgroup_subsys_state *css,
+				      struct cftype *cft)
+{
+	struct freezer *freezer = css_freezer(css);
+
+	return (bool)(freezer->state & CGROUP_FREEZING_SELF);
+}
+
+static u64 freezer_parent_freezing_read(struct cgroup_subsys_state *css,
+					struct cftype *cft)
+{
+	struct freezer *freezer = css_freezer(css);
+
+	return (bool)(freezer->state & CGROUP_FREEZING_PARENT);
+}
+
+static struct cftype files[] = {
+	{
+		.name = "state",
+		.flags = CFTYPE_NOT_ON_ROOT,
+		.seq_show = freezer_read,
+		.write = freezer_write,
+	},
+	{
+		.name = "self_freezing",
+		.flags = CFTYPE_NOT_ON_ROOT,
+		.read_u64 = freezer_self_freezing_read,
+	},
+	{
+		.name = "parent_freezing",
+		.flags = CFTYPE_NOT_ON_ROOT,
+		.read_u64 = freezer_parent_freezing_read,
+	},
+	{ }	/* terminate */
+};
+
+struct cgroup_subsys freezer_cgrp_subsys = {
+	.css_alloc	= freezer_css_alloc,
+	.css_online	= freezer_css_online,
+	.css_offline	= freezer_css_offline,
+	.css_free	= freezer_css_free,
+	.attach		= freezer_attach,
+	.fork		= freezer_fork,
+	.legacy_cftypes	= files,
+};
diff --git a/kernel/fork.c b/kernel/fork.c
index 8b03d93ba068..5359facf9867 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1225,7 +1225,9 @@ static int wait_for_vfork_done(struct task_struct *child,
 	int killed;
 
 	freezer_do_not_count();
+	cgroup_enter_frozen();
 	killed = wait_for_completion_killable(vfork);
+	cgroup_leave_frozen(false);
 	freezer_count();
 
 	if (killed) {
diff --git a/kernel/signal.c b/kernel/signal.c
index cd83cc376767..62f9aea4a15a 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -43,6 +43,7 @@
 #include <linux/compiler.h>
 #include <linux/posix-timers.h>
 #include <linux/livepatch.h>
+#include <linux/cgroup.h>
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/signal.h>
@@ -146,9 +147,10 @@ static inline bool has_pending_signals(sigset_t *signal, sigset_t *blocked)
 
 static bool recalc_sigpending_tsk(struct task_struct *t)
 {
-	if ((t->jobctl & JOBCTL_PENDING_MASK) ||
+	if ((t->jobctl & (JOBCTL_PENDING_MASK | JOBCTL_TRAP_FREEZE)) ||
 	    PENDING(&t->pending, &t->blocked) ||
-	    PENDING(&t->signal->shared_pending, &t->blocked)) {
+	    PENDING(&t->signal->shared_pending, &t->blocked) ||
+	    cgroup_task_frozen(t)) {
 		set_tsk_thread_flag(t, TIF_SIGPENDING);
 		return true;
 	}
@@ -2108,6 +2110,7 @@ static void ptrace_stop(int exit_code, int why, int clear_code, kernel_siginfo_t
 		preempt_disable();
 		read_unlock(&tasklist_lock);
 		preempt_enable_no_resched();
+		cgroup_enter_frozen();
 		freezable_schedule();
 	} else {
 		/*
@@ -2286,6 +2289,7 @@ static bool do_signal_stop(int signr)
 		}
 
 		/* Now we don't run again until woken by SIGCONT or SIGKILL */
+		cgroup_enter_frozen();
 		freezable_schedule();
 		return true;
 	} else {
@@ -2332,6 +2336,43 @@ static void do_jobctl_trap(void)
 	}
 }
 
+/**
+ * do_freezer_trap - handle the freezer jobctl trap
+ *
+ * Puts the task into frozen state, if only the task is not about to quit.
+ * In this case it drops JOBCTL_TRAP_FREEZE.
+ *
+ * CONTEXT:
+ * Must be called with @current->sighand->siglock held,
+ * which is always released before returning.
+ */
+static void do_freezer_trap(void)
+	__releases(&current->sighand->siglock)
+{
+	/*
+	 * If there are other trap bits pending except JOBCTL_TRAP_FREEZE,
+	 * let's make another loop to give it a chance to be handled.
+	 * In any case, we'll return back.
+	 */
+	if ((current->jobctl & (JOBCTL_PENDING_MASK | JOBCTL_TRAP_FREEZE)) !=
+	     JOBCTL_TRAP_FREEZE) {
+		spin_unlock_irq(&current->sighand->siglock);
+		return;
+	}
+
+	/*
+	 * Now we're sure that there is no pending fatal signal and no
+	 * pending traps. Clear TIF_SIGPENDING to not get out of schedule()
+	 * immediately (if there is a non-fatal signal pending), and
+	 * put the task into sleep.
+	 */
+	__set_current_state(TASK_INTERRUPTIBLE);
+	clear_thread_flag(TIF_SIGPENDING);
+	spin_unlock_irq(&current->sighand->siglock);
+	cgroup_enter_frozen();
+	freezable_schedule();
+}
+
 static int ptrace_signal(int signr, kernel_siginfo_t *info)
 {
 	/*
@@ -2452,9 +2493,24 @@ relock:
 		    do_signal_stop(0))
 			goto relock;
 
-		if (unlikely(current->jobctl & JOBCTL_TRAP_MASK)) {
-			do_jobctl_trap();
+		if (unlikely(current->jobctl &
+			     (JOBCTL_TRAP_MASK | JOBCTL_TRAP_FREEZE))) {
+			if (current->jobctl & JOBCTL_TRAP_MASK) {
+				do_jobctl_trap();
+				spin_unlock_irq(&sighand->siglock);
+			} else if (current->jobctl & JOBCTL_TRAP_FREEZE)
+				do_freezer_trap();
+
+			goto relock;
+		}
+
+		/*
+		 * If the task is leaving the frozen state, let's update
+		 * cgroup counters and reset the frozen bit.
+		 */
+		if (unlikely(cgroup_task_frozen(current))) {
 			spin_unlock_irq(&sighand->siglock);
+			cgroup_leave_frozen(false);
 			goto relock;
 		}
 
@@ -2550,6 +2606,8 @@ relock:
 
 	fatal:
 		spin_unlock_irq(&sighand->siglock);
+		if (unlikely(cgroup_task_frozen(current)))
+			cgroup_leave_frozen(true);
 
 		/*
 		 * Anything else is fatal, maybe with a core dump.
diff --git a/tools/testing/selftests/cgroup/.gitignore b/tools/testing/selftests/cgroup/.gitignore
index adacda50a4b2..7f9835624793 100644
--- a/tools/testing/selftests/cgroup/.gitignore
+++ b/tools/testing/selftests/cgroup/.gitignore
@@ -1,2 +1,3 @@
 test_memcontrol
 test_core
+test_freezer
diff --git a/tools/testing/selftests/cgroup/Makefile b/tools/testing/selftests/cgroup/Makefile
index 23fbaa4a9630..8d369b6a2069 100644
--- a/tools/testing/selftests/cgroup/Makefile
+++ b/tools/testing/selftests/cgroup/Makefile
@@ -5,8 +5,10 @@ all:
 
 TEST_GEN_PROGS = test_memcontrol
 TEST_GEN_PROGS += test_core
+TEST_GEN_PROGS += test_freezer
 
 include ../lib.mk
 
 $(OUTPUT)/test_memcontrol: cgroup_util.c
 $(OUTPUT)/test_core: cgroup_util.c
+$(OUTPUT)/test_freezer: cgroup_util.c
diff --git a/tools/testing/selftests/cgroup/cgroup_util.c b/tools/testing/selftests/cgroup/cgroup_util.c
index 14c9fe284806..4c223266299a 100644
--- a/tools/testing/selftests/cgroup/cgroup_util.c
+++ b/tools/testing/selftests/cgroup/cgroup_util.c
@@ -74,6 +74,16 @@ char *cg_name_indexed(const char *root, const char *name, int index)
 	return ret;
 }
 
+char *cg_control(const char *cgroup, const char *control)
+{
+	size_t len = strlen(cgroup) + strlen(control) + 2;
+	char *ret = malloc(len);
+
+	snprintf(ret, len, "%s/%s", cgroup, control);
+
+	return ret;
+}
+
 int cg_read(const char *cgroup, const char *control, char *buf, size_t len)
 {
 	char path[PATH_MAX];
@@ -196,7 +206,32 @@ int cg_create(const char *cgroup)
 	return mkdir(cgroup, 0644);
 }
 
-static int cg_killall(const char *cgroup)
+int cg_wait_for_proc_count(const char *cgroup, int count)
+{
+	char buf[10 * PAGE_SIZE] = {0};
+	int attempts;
+	char *ptr;
+
+	for (attempts = 10; attempts >= 0; attempts--) {
+		int nr = 0;
+
+		if (cg_read(cgroup, "cgroup.procs", buf, sizeof(buf)))
+			break;
+
+		for (ptr = buf; *ptr; ptr++)
+			if (*ptr == '\n')
+				nr++;
+
+		if (nr >= count)
+			return 0;
+
+		usleep(100000);
+	}
+
+	return -1;
+}
+
+int cg_killall(const char *cgroup)
 {
 	char buf[PAGE_SIZE];
 	char *ptr = buf;
@@ -227,9 +262,7 @@ int cg_destroy(const char *cgroup)
 retry:
 	ret = rmdir(cgroup);
 	if (ret && errno == EBUSY) {
-		ret = cg_killall(cgroup);
-		if (ret)
-			return ret;
+		cg_killall(cgroup);
 		usleep(100);
 		goto retry;
 	}
@@ -240,6 +273,14 @@ retry:
 	return ret;
 }
 
+int cg_enter(const char *cgroup, int pid)
+{
+	char pidbuf[64];
+
+	snprintf(pidbuf, sizeof(pidbuf), "%d", pid);
+	return cg_write(cgroup, "cgroup.procs", pidbuf);
+}
+
 int cg_enter_current(const char *cgroup)
 {
 	char pidbuf[64];
@@ -369,3 +410,12 @@ int set_oom_adj_score(int pid, int score)
 	close(fd);
 	return 0;
 }
+
+char proc_read_text(int pid, const char *item, char *buf, size_t size)
+{
+	char path[PATH_MAX];
+
+	snprintf(path, sizeof(path), "/proc/%d/%s", pid, item);
+
+	return read_text(path, buf, size);
+}
diff --git a/tools/testing/selftests/cgroup/cgroup_util.h b/tools/testing/selftests/cgroup/cgroup_util.h
index 9ac8b7958f83..c72f28046bfa 100644
--- a/tools/testing/selftests/cgroup/cgroup_util.h
+++ b/tools/testing/selftests/cgroup/cgroup_util.h
@@ -18,6 +18,7 @@ static inline int values_close(long a, long b, int err)
 extern int cg_find_unified_root(char *root, size_t len);
 extern char *cg_name(const char *root, const char *name);
 extern char *cg_name_indexed(const char *root, const char *name, int index);
+extern char *cg_control(const char *cgroup, const char *control);
 extern int cg_create(const char *cgroup);
 extern int cg_destroy(const char *cgroup);
 extern int cg_read(const char *cgroup, const char *control,
@@ -32,6 +33,7 @@ extern int cg_write(const char *cgroup, const char *control, char *buf);
 extern int cg_run(const char *cgroup,
 		  int (*fn)(const char *cgroup, void *arg),
 		  void *arg);
+extern int cg_enter(const char *cgroup, int pid);
 extern int cg_enter_current(const char *cgroup);
 extern int cg_run_nowait(const char *cgroup,
 			 int (*fn)(const char *cgroup, void *arg),
@@ -41,3 +43,6 @@ extern int alloc_pagecache(int fd, size_t size);
 extern int alloc_anon(const char *cgroup, void *arg);
 extern int is_swap_enabled(void);
 extern int set_oom_adj_score(int pid, int score);
+extern int cg_wait_for_proc_count(const char *cgroup, int count);
+extern int cg_killall(const char *cgroup);
+extern char proc_read_text(int pid, const char *item, char *buf, size_t size);
diff --git a/tools/testing/selftests/cgroup/test_freezer.c b/tools/testing/selftests/cgroup/test_freezer.c
new file mode 100644
index 000000000000..2bfddb6d6d3b
--- /dev/null
+++ b/tools/testing/selftests/cgroup/test_freezer.c
@@ -0,0 +1,851 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <stdbool.h>
+#include <linux/limits.h>
+#include <sys/ptrace.h>
+#include <sys/types.h>
+#include <sys/mman.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <errno.h>
+#include <poll.h>
+#include <stdlib.h>
+#include <sys/inotify.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+
+#include "../kselftest.h"
+#include "cgroup_util.h"
+
+#define DEBUG
+#ifdef DEBUG
+#define debug(args...) fprintf(stderr, args)
+#else
+#define debug(args...)
+#endif
+
+/*
+ * Check if the cgroup is frozen by looking at the cgroup.events::frozen value.
+ */
+static int cg_check_frozen(const char *cgroup, bool frozen)
+{
+	if (frozen) {
+		if (cg_read_strstr(cgroup, "cgroup.events", "frozen 1") != 0) {
+			debug("Cgroup %s isn't frozen\n", cgroup);
+			return -1;
+		}
+	} else {
+		/*
+		 * Check the cgroup.events::frozen value.
+		 */
+		if (cg_read_strstr(cgroup, "cgroup.events", "frozen 0") != 0) {
+			debug("Cgroup %s is frozen\n", cgroup);
+			return -1;
+		}
+	}
+
+	return 0;
+}
+
+/*
+ * Freeze the given cgroup.
+ */
+static int cg_freeze_nowait(const char *cgroup, bool freeze)
+{
+	return cg_write(cgroup, "cgroup.freeze", freeze ? "1" : "0");
+}
+
+/*
+ * Prepare for waiting on cgroup.events file.
+ */
+static int cg_prepare_for_wait(const char *cgroup)
+{
+	int fd, ret = -1;
+
+	fd = inotify_init1(0);
+	if (fd == -1) {
+		debug("Error: inotify_init1() failed\n");
+		return fd;
+	}
+
+	ret = inotify_add_watch(fd, cg_control(cgroup, "cgroup.events"),
+				IN_MODIFY);
+	if (ret == -1) {
+		debug("Error: inotify_add_watch() failed\n");
+		close(fd);
+	}
+
+	return fd;
+}
+
+/*
+ * Wait for an event. If there are no events for 10 seconds,
+ * treat this an error.
+ */
+static int cg_wait_for(int fd)
+{
+	int ret = -1;
+	struct pollfd fds = {
+		.fd = fd,
+		.events = POLLIN,
+	};
+
+	while (true) {
+		ret = poll(&fds, 1, 10000);
+
+		if (ret == -1) {
+			if (errno == EINTR)
+				continue;
+			debug("Error: poll() failed\n");
+			break;
+		}
+
+		if (ret > 0 && fds.revents & POLLIN) {
+			ret = 0;
+			break;
+		}
+	}
+
+	return ret;
+}
+
+/*
+ * Attach a task to the given cgroup and wait for a cgroup frozen event.
+ * All transient events (e.g. populated) are ignored.
+ */
+static int cg_enter_and_wait_for_frozen(const char *cgroup, int pid,
+					bool frozen)
+{
+	int fd, ret = -1;
+	int attempts;
+
+	fd = cg_prepare_for_wait(cgroup);
+	if (fd < 0)
+		return fd;
+
+	ret = cg_enter(cgroup, pid);
+	if (ret)
+		goto out;
+
+	for (attempts = 0; attempts < 10; attempts++) {
+		ret = cg_wait_for(fd);
+		if (ret)
+			break;
+
+		ret = cg_check_frozen(cgroup, frozen);
+		if (ret)
+			continue;
+	}
+
+out:
+	close(fd);
+	return ret;
+}
+
+/*
+ * Freeze the given cgroup and wait for the inotify signal.
+ * If there are no events in 10 seconds, treat this as an error.
+ * Then check that the cgroup is in the desired state.
+ */
+static int cg_freeze_wait(const char *cgroup, bool freeze)
+{
+	int fd, ret = -1;
+
+	fd = cg_prepare_for_wait(cgroup);
+	if (fd < 0)
+		return fd;
+
+	ret = cg_freeze_nowait(cgroup, freeze);
+	if (ret) {
+		debug("Error: cg_freeze_nowait() failed\n");
+		goto out;
+	}
+
+	ret = cg_wait_for(fd);
+	if (ret)
+		goto out;
+
+	ret = cg_check_frozen(cgroup, freeze);
+out:
+	close(fd);
+	return ret;
+}
+
+/*
+ * A simple process running in a sleep loop until being
+ * re-parented.
+ */
+static int child_fn(const char *cgroup, void *arg)
+{
+	int ppid = getppid();
+
+	while (getppid() == ppid)
+		usleep(1000);
+
+	return getppid() == ppid;
+}
+
+/*
+ * A simple test for the cgroup freezer: populated the cgroup with 100
+ * running processes and freeze it. Then unfreeze it. Then it kills all
+ * processes and destroys the cgroup.
+ */
+static int test_cgfreezer_simple(const char *root)
+{
+	int ret = KSFT_FAIL;
+	char *cgroup = NULL;
+	int i;
+
+	cgroup = cg_name(root, "cg_test_simple");
+	if (!cgroup)
+		goto cleanup;
+
+	if (cg_create(cgroup))
+		goto cleanup;
+
+	for (i = 0; i < 100; i++)
+		cg_run_nowait(cgroup, child_fn, NULL);
+
+	if (cg_wait_for_proc_count(cgroup, 100))
+		goto cleanup;
+
+	if (cg_check_frozen(cgroup, false))
+		goto cleanup;
+
+	if (cg_freeze_wait(cgroup, true))
+		goto cleanup;
+
+	if (cg_freeze_wait(cgroup, false))
+		goto cleanup;
+
+	ret = KSFT_PASS;
+
+cleanup:
+	if (cgroup)
+		cg_destroy(cgroup);
+	free(cgroup);
+	return ret;
+}
+
+/*
+ * The test creates the following hierarchy:
+ *       A
+ *    / / \ \
+ *   B  E  I K
+ *  /\  |
+ * C  D F
+ *      |
+ *      G
+ *      |
+ *      H
+ *
+ * with a process in C, H and 3 processes in K.
+ * Then it tries to freeze and unfreeze the whole tree.
+ */
+static int test_cgfreezer_tree(const char *root)
+{
+	char *cgroup[10] = {0};
+	int ret = KSFT_FAIL;
+	int i;
+
+	cgroup[0] = cg_name(root, "cg_test_tree_A");
+	if (!cgroup[0])
+		goto cleanup;
+
+	cgroup[1] = cg_name(cgroup[0], "B");
+	if (!cgroup[1])
+		goto cleanup;
+
+	cgroup[2] = cg_name(cgroup[1], "C");
+	if (!cgroup[2])
+		goto cleanup;
+
+	cgroup[3] = cg_name(cgroup[1], "D");
+	if (!cgroup[3])
+		goto cleanup;
+
+	cgroup[4] = cg_name(cgroup[0], "E");
+	if (!cgroup[4])
+		goto cleanup;
+
+	cgroup[5] = cg_name(cgroup[4], "F");
+	if (!cgroup[5])
+		goto cleanup;
+
+	cgroup[6] = cg_name(cgroup[5], "G");
+	if (!cgroup[6])
+		goto cleanup;
+
+	cgroup[7] = cg_name(cgroup[6], "H");
+	if (!cgroup[7])
+		goto cleanup;
+
+	cgroup[8] = cg_name(cgroup[0], "I");
+	if (!cgroup[8])
+		goto cleanup;
+
+	cgroup[9] = cg_name(cgroup[0], "K");
+	if (!cgroup[9])
+		goto cleanup;
+
+	for (i = 0; i < 10; i++)
+		if (cg_create(cgroup[i]))
+			goto cleanup;
+
+	cg_run_nowait(cgroup[2], child_fn, NULL);
+	cg_run_nowait(cgroup[7], child_fn, NULL);
+	cg_run_nowait(cgroup[9], child_fn, NULL);
+	cg_run_nowait(cgroup[9], child_fn, NULL);
+	cg_run_nowait(cgroup[9], child_fn, NULL);
+
+	/*
+	 * Wait until all child processes will enter
+	 * corresponding cgroups.
+	 */
+
+	if (cg_wait_for_proc_count(cgroup[2], 1) ||
+	    cg_wait_for_proc_count(cgroup[7], 1) ||
+	    cg_wait_for_proc_count(cgroup[9], 3))
+		goto cleanup;
+
+	/*
+	 * Freeze B.
+	 */
+	if (cg_freeze_wait(cgroup[1], true))
+		goto cleanup;
+
+	/*
+	 * Freeze F.
+	 */
+	if (cg_freeze_wait(cgroup[5], true))
+		goto cleanup;
+
+	/*
+	 * Freeze G.
+	 */
+	if (cg_freeze_wait(cgroup[6], true))
+		goto cleanup;
+
+	/*
+	 * Check that A and E are not frozen.
+	 */
+	if (cg_check_frozen(cgroup[0], false))
+		goto cleanup;
+
+	if (cg_check_frozen(cgroup[4], false))
+		goto cleanup;
+
+	/*
+	 * Freeze A. Check that A, B and E are frozen.
+	 */
+	if (cg_freeze_wait(cgroup[0], true))
+		goto cleanup;
+
+	if (cg_check_frozen(cgroup[1], true))
+		goto cleanup;
+
+	if (cg_check_frozen(cgroup[4], true))
+		goto cleanup;
+
+	/*
+	 * Unfreeze B, F and G
+	 */
+	if (cg_freeze_nowait(cgroup[1], false))
+		goto cleanup;
+
+	if (cg_freeze_nowait(cgroup[5], false))
+		goto cleanup;
+
+	if (cg_freeze_nowait(cgroup[6], false))
+		goto cleanup;
+
+	/*
+	 * Check that C and H are still frozen.
+	 */
+	if (cg_check_frozen(cgroup[2], true))
+		goto cleanup;
+
+	if (cg_check_frozen(cgroup[7], true))
+		goto cleanup;
+
+	/*
+	 * Unfreeze A. Check that A, C and K are not frozen.
+	 */
+	if (cg_freeze_wait(cgroup[0], false))
+		goto cleanup;
+
+	if (cg_check_frozen(cgroup[2], false))
+		goto cleanup;
+
+	if (cg_check_frozen(cgroup[9], false))
+		goto cleanup;
+
+	ret = KSFT_PASS;
+
+cleanup:
+	for (i = 9; i >= 0 && cgroup[i]; i--) {
+		cg_destroy(cgroup[i]);
+		free(cgroup[i]);
+	}
+
+	return ret;
+}
+
+/*
+ * A fork bomb emulator.
+ */
+static int forkbomb_fn(const char *cgroup, void *arg)
+{
+	int ppid;
+
+	fork();
+	fork();
+
+	ppid = getppid();
+
+	while (getppid() == ppid)
+		usleep(1000);
+
+	return getppid() == ppid;
+}
+
+/*
+ * The test runs a fork bomb in a cgroup and tries to freeze it.
+ * Then it kills all processes and checks that cgroup isn't populated
+ * anymore.
+ */
+static int test_cgfreezer_forkbomb(const char *root)
+{
+	int ret = KSFT_FAIL;
+	char *cgroup = NULL;
+
+	cgroup = cg_name(root, "cg_forkbomb_test");
+	if (!cgroup)
+		goto cleanup;
+
+	if (cg_create(cgroup))
+		goto cleanup;
+
+	cg_run_nowait(cgroup, forkbomb_fn, NULL);
+
+	usleep(100000);
+
+	if (cg_freeze_wait(cgroup, true))
+		goto cleanup;
+
+	if (cg_killall(cgroup))
+		goto cleanup;
+
+	if (cg_wait_for_proc_count(cgroup, 0))
+		goto cleanup;
+
+	ret = KSFT_PASS;
+
+cleanup:
+	if (cgroup)
+		cg_destroy(cgroup);
+	free(cgroup);
+	return ret;
+}
+
+/*
+ * The test creates two nested cgroups, freezes the parent
+ * and removes the child. Then it checks that the parent cgroup
+ * remains frozen and it's possible to create a new child
+ * without unfreezing. The new child is frozen too.
+ */
+static int test_cgfreezer_rmdir(const char *root)
+{
+	int ret = KSFT_FAIL;
+	char *parent, *child = NULL;
+
+	parent = cg_name(root, "cg_test_rmdir_A");
+	if (!parent)
+		goto cleanup;
+
+	child = cg_name(parent, "cg_test_rmdir_B");
+	if (!child)
+		goto cleanup;
+
+	if (cg_create(parent))
+		goto cleanup;
+
+	if (cg_create(child))
+		goto cleanup;
+
+	if (cg_freeze_wait(parent, true))
+		goto cleanup;
+
+	if (cg_destroy(child))
+		goto cleanup;
+
+	if (cg_check_frozen(parent, true))
+		goto cleanup;
+
+	if (cg_create(child))
+		goto cleanup;
+
+	if (cg_check_frozen(child, true))
+		goto cleanup;
+
+	ret = KSFT_PASS;
+
+cleanup:
+	if (child)
+		cg_destroy(child);
+	free(child);
+	if (parent)
+		cg_destroy(parent);
+	free(parent);
+	return ret;
+}
+
+/*
+ * The test creates two cgroups: A and B, runs a process in A
+ * and performs several migrations:
+ * 1) A (running) -> B (frozen)
+ * 2) B (frozen) -> A (running)
+ * 3) A (frozen) -> B (frozen)
+ *
+ * On each step it checks the actual state of both cgroups.
+ */
+static int test_cgfreezer_migrate(const char *root)
+{
+	int ret = KSFT_FAIL;
+	char *cgroup[2] = {0};
+	int pid;
+
+	cgroup[0] = cg_name(root, "cg_test_migrate_A");
+	if (!cgroup[0])
+		goto cleanup;
+
+	cgroup[1] = cg_name(root, "cg_test_migrate_B");
+	if (!cgroup[1])
+		goto cleanup;
+
+	if (cg_create(cgroup[0]))
+		goto cleanup;
+
+	if (cg_create(cgroup[1]))
+		goto cleanup;
+
+	pid = cg_run_nowait(cgroup[0], child_fn, NULL);
+	if (pid < 0)
+		goto cleanup;
+
+	if (cg_wait_for_proc_count(cgroup[0], 1))
+		goto cleanup;
+
+	/*
+	 * Migrate from A (running) to B (frozen)
+	 */
+	if (cg_freeze_wait(cgroup[1], true))
+		goto cleanup;
+
+	if (cg_enter_and_wait_for_frozen(cgroup[1], pid, true))
+		goto cleanup;
+
+	if (cg_check_frozen(cgroup[0], false))
+		goto cleanup;
+
+	/*
+	 * Migrate from B (frozen) to A (running)
+	 */
+	if (cg_enter_and_wait_for_frozen(cgroup[0], pid, false))
+		goto cleanup;
+
+	if (cg_check_frozen(cgroup[1], true))
+		goto cleanup;
+
+	/*
+	 * Migrate from A (frozen) to B (frozen)
+	 */
+	if (cg_freeze_wait(cgroup[0], true))
+		goto cleanup;
+
+	if (cg_enter_and_wait_for_frozen(cgroup[1], pid, true))
+		goto cleanup;
+
+	if (cg_check_frozen(cgroup[0], true))
+		goto cleanup;
+
+	ret = KSFT_PASS;
+
+cleanup:
+	if (cgroup[0])
+		cg_destroy(cgroup[0]);
+	free(cgroup[0]);
+	if (cgroup[1])
+		cg_destroy(cgroup[1]);
+	free(cgroup[1]);
+	return ret;
+}
+
+/*
+ * The test checks that ptrace works with a tracing process in a frozen cgroup.
+ */
+static int test_cgfreezer_ptrace(const char *root)
+{
+	int ret = KSFT_FAIL;
+	char *cgroup = NULL;
+	siginfo_t siginfo;
+	int pid;
+
+	cgroup = cg_name(root, "cg_test_ptrace");
+	if (!cgroup)
+		goto cleanup;
+
+	if (cg_create(cgroup))
+		goto cleanup;
+
+	pid = cg_run_nowait(cgroup, child_fn, NULL);
+	if (pid < 0)
+		goto cleanup;
+
+	if (cg_wait_for_proc_count(cgroup, 1))
+		goto cleanup;
+
+	if (cg_freeze_wait(cgroup, true))
+		goto cleanup;
+
+	if (ptrace(PTRACE_SEIZE, pid, NULL, NULL))
+		goto cleanup;
+
+	if (ptrace(PTRACE_INTERRUPT, pid, NULL, NULL))
+		goto cleanup;
+
+	waitpid(pid, NULL, 0);
+
+	/*
+	 * Cgroup has to remain frozen, however the test task
+	 * is in traced state.
+	 */
+	if (cg_check_frozen(cgroup, true))
+		goto cleanup;
+
+	if (ptrace(PTRACE_GETSIGINFO, pid, NULL, &siginfo))
+		goto cleanup;
+
+	if (ptrace(PTRACE_DETACH, pid, NULL, NULL))
+		goto cleanup;
+
+	if (cg_check_frozen(cgroup, true))
+		goto cleanup;
+
+	ret = KSFT_PASS;
+
+cleanup:
+	if (cgroup)
+		cg_destroy(cgroup);
+	free(cgroup);
+	return ret;
+}
+
+/*
+ * Check if the process is stopped.
+ */
+static int proc_check_stopped(int pid)
+{
+	char buf[PAGE_SIZE];
+	int len;
+
+	len = proc_read_text(pid, "stat", buf, sizeof(buf));
+	if (len == -1) {
+		debug("Can't get %d stat\n", pid);
+		return -1;
+	}
+
+	if (strstr(buf, "(test_freezer) T ") == NULL) {
+		debug("Process %d in the unexpected state: %s\n", pid, buf);
+		return -1;
+	}
+
+	return 0;
+}
+
+/*
+ * Test that it's possible to freeze a cgroup with a stopped process.
+ */
+static int test_cgfreezer_stopped(const char *root)
+{
+	int pid, ret = KSFT_FAIL;
+	char *cgroup = NULL;
+
+	cgroup = cg_name(root, "cg_test_stopped");
+	if (!cgroup)
+		goto cleanup;
+
+	if (cg_create(cgroup))
+		goto cleanup;
+
+	pid = cg_run_nowait(cgroup, child_fn, NULL);
+
+	if (cg_wait_for_proc_count(cgroup, 1))
+		goto cleanup;
+
+	if (kill(pid, SIGSTOP))
+		goto cleanup;
+
+	if (cg_check_frozen(cgroup, false))
+		goto cleanup;
+
+	if (cg_freeze_wait(cgroup, true))
+		goto cleanup;
+
+	if (cg_freeze_wait(cgroup, false))
+		goto cleanup;
+
+	if (proc_check_stopped(pid))
+		goto cleanup;
+
+	ret = KSFT_PASS;
+
+cleanup:
+	if (cgroup)
+		cg_destroy(cgroup);
+	free(cgroup);
+	return ret;
+}
+
+/*
+ * Test that it's possible to freeze a cgroup with a ptraced process.
+ */
+static int test_cgfreezer_ptraced(const char *root)
+{
+	int pid, ret = KSFT_FAIL;
+	char *cgroup = NULL;
+	siginfo_t siginfo;
+
+	cgroup = cg_name(root, "cg_test_ptraced");
+	if (!cgroup)
+		goto cleanup;
+
+	if (cg_create(cgroup))
+		goto cleanup;
+
+	pid = cg_run_nowait(cgroup, child_fn, NULL);
+
+	if (cg_wait_for_proc_count(cgroup, 1))
+		goto cleanup;
+
+	if (ptrace(PTRACE_SEIZE, pid, NULL, NULL))
+		goto cleanup;
+
+	if (ptrace(PTRACE_INTERRUPT, pid, NULL, NULL))
+		goto cleanup;
+
+	waitpid(pid, NULL, 0);
+
+	if (cg_check_frozen(cgroup, false))
+		goto cleanup;
+
+	if (cg_freeze_wait(cgroup, true))
+		goto cleanup;
+
+	/*
+	 * cg_check_frozen(cgroup, true) will fail here,
+	 * because the task in in the TRACEd state.
+	 */
+	if (cg_freeze_wait(cgroup, false))
+		goto cleanup;
+
+	if (ptrace(PTRACE_GETSIGINFO, pid, NULL, &siginfo))
+		goto cleanup;
+
+	if (ptrace(PTRACE_DETACH, pid, NULL, NULL))
+		goto cleanup;
+
+	ret = KSFT_PASS;
+
+cleanup:
+	if (cgroup)
+		cg_destroy(cgroup);
+	free(cgroup);
+	return ret;
+}
+
+static int vfork_fn(const char *cgroup, void *arg)
+{
+	int pid = vfork();
+
+	if (pid == 0)
+		while (true)
+			sleep(1);
+
+	return pid;
+}
+
+/*
+ * Test that it's possible to freeze a cgroup with a process,
+ * which called vfork() and is waiting for a child.
+ */
+static int test_cgfreezer_vfork(const char *root)
+{
+	int ret = KSFT_FAIL;
+	char *cgroup = NULL;
+
+	cgroup = cg_name(root, "cg_test_vfork");
+	if (!cgroup)
+		goto cleanup;
+
+	if (cg_create(cgroup))
+		goto cleanup;
+
+	cg_run_nowait(cgroup, vfork_fn, NULL);
+
+	if (cg_wait_for_proc_count(cgroup, 2))
+		goto cleanup;
+
+	if (cg_freeze_wait(cgroup, true))
+		goto cleanup;
+
+	ret = KSFT_PASS;
+
+cleanup:
+	if (cgroup)
+		cg_destroy(cgroup);
+	free(cgroup);
+	return ret;
+}
+
+#define T(x) { x, #x }
+struct cgfreezer_test {
+	int (*fn)(const char *root);
+	const char *name;
+} tests[] = {
+	T(test_cgfreezer_simple),
+	T(test_cgfreezer_tree),
+	T(test_cgfreezer_forkbomb),
+	T(test_cgfreezer_rmdir),
+	T(test_cgfreezer_migrate),
+	T(test_cgfreezer_ptrace),
+	T(test_cgfreezer_stopped),
+	T(test_cgfreezer_ptraced),
+	T(test_cgfreezer_vfork),
+};
+#undef T
+
+int main(int argc, char *argv[])
+{
+	char root[PATH_MAX];
+	int i, ret = EXIT_SUCCESS;
+
+	if (cg_find_unified_root(root, sizeof(root)))
+		ksft_exit_skip("cgroup v2 isn't mounted\n");
+	for (i = 0; i < ARRAY_SIZE(tests); i++) {
+		switch (tests[i].fn(root)) {
+		case KSFT_PASS:
+			ksft_test_result_pass("%s\n", tests[i].name);
+			break;
+		case KSFT_SKIP:
+			ksft_test_result_skip("%s\n", tests[i].name);
+			break;
+		default:
+			ret = EXIT_FAILURE;
+			ksft_test_result_fail("%s\n", tests[i].name);
+			break;
+		}
+	}
+
+	return ret;
+}
author	Linus Torvalds <torvalds@linux-foundation.org>	2019-05-09 13:52:12 -0700
committer	Linus Torvalds <torvalds@linux-foundation.org>	2019-05-09 13:52:12 -0700
commit	abde77eb5c66b2f98539c4644b54f34b7e179e6b (patch)
tree	cc6f6142641397e895fba9a19d0971f3827d7422
parent	Merge branch 'for-5.2' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/wq (diff)
parent	cgroup: never call do_group_exit() with task->frozen bit set (diff)