From 4c737b41de7f4eef2a593803bad1b918dd718b10 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 10 Aug 2016 11:23:44 -0400
Subject: cgroup: make cgroup_path() and friends behave in the style of
 strlcpy()

cgroup_path() and friends used to format the path from the end and
thus the resulting path usually didn't start at the start of the
passed in buffer.  Also, when the buffer was too small, the partial
result was truncated from the head rather than tail and there was no
way to tell how long the full path would be.  These make the functions
less robust and more awkward to use.

With recent updates to kernfs_path(), cgroup_path() and friends can be
made to behave in strlcpy() style.

* cgroup_path(), cgroup_path_ns[_locked]() and task_cgroup_path() now
  always return the length of the full path.  If buffer is too small,
  it contains nul terminated truncated output.

* All users updated accordingly.

v2: cgroup_path() usage in kernel/sched/debug.c converted.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Serge Hallyn <serge.hallyn@ubuntu.com>
Cc: Peter Zijlstra <peterz@infradead.org>
---
 kernel/cgroup.c | 48 ++++++++++++++++++++++--------------------------
 1 file changed, 22 insertions(+), 26 deletions(-)

(limited to 'kernel/cgroup.c')

diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index d1c51b7f5221..3861161e460f 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -2315,22 +2315,18 @@ static struct file_system_type cgroup2_fs_type = {
 	.fs_flags = FS_USERNS_MOUNT,
 };
 
-static char *cgroup_path_ns_locked(struct cgroup *cgrp, char *buf, size_t buflen,
-				   struct cgroup_namespace *ns)
+static int cgroup_path_ns_locked(struct cgroup *cgrp, char *buf, size_t buflen,
+				 struct cgroup_namespace *ns)
 {
 	struct cgroup *root = cset_cgroup_from_root(ns->root_cset, cgrp->root);
-	int ret;
 
-	ret = kernfs_path_from_node(cgrp->kn, root->kn, buf, buflen);
-	if (ret < 0 || ret >= buflen)
-		return NULL;
-	return buf;
+	return kernfs_path_from_node(cgrp->kn, root->kn, buf, buflen);
 }
 
-char *cgroup_path_ns(struct cgroup *cgrp, char *buf, size_t buflen,
-		     struct cgroup_namespace *ns)
+int cgroup_path_ns(struct cgroup *cgrp, char *buf, size_t buflen,
+		   struct cgroup_namespace *ns)
 {
-	char *ret;
+	int ret;
 
 	mutex_lock(&cgroup_mutex);
 	spin_lock_irq(&css_set_lock);
@@ -2357,12 +2353,12 @@ EXPORT_SYMBOL_GPL(cgroup_path_ns);
  *
  * Return value is the same as kernfs_path().
  */
-char *task_cgroup_path(struct task_struct *task, char *buf, size_t buflen)
+int task_cgroup_path(struct task_struct *task, char *buf, size_t buflen)
 {
 	struct cgroup_root *root;
 	struct cgroup *cgrp;
 	int hierarchy_id = 1;
-	char *path = NULL;
+	int ret;
 
 	mutex_lock(&cgroup_mutex);
 	spin_lock_irq(&css_set_lock);
@@ -2371,16 +2367,15 @@ char *task_cgroup_path(struct task_struct *task, char *buf, size_t buflen)
 
 	if (root) {
 		cgrp = task_cgroup_from_root(task, root);
-		path = cgroup_path_ns_locked(cgrp, buf, buflen, &init_cgroup_ns);
+		ret = cgroup_path_ns_locked(cgrp, buf, buflen, &init_cgroup_ns);
 	} else {
 		/* if no hierarchy exists, everyone is in "/" */
-		if (strlcpy(buf, "/", buflen) < buflen)
-			path = buf;
+		ret = strlcpy(buf, "/", buflen);
 	}
 
 	spin_unlock_irq(&css_set_lock);
 	mutex_unlock(&cgroup_mutex);
-	return path;
+	return ret;
 }
 EXPORT_SYMBOL_GPL(task_cgroup_path);
 
@@ -5716,7 +5711,7 @@ core_initcall(cgroup_wq_init);
 int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns,
 		     struct pid *pid, struct task_struct *tsk)
 {
-	char *buf, *path;
+	char *buf;
 	int retval;
 	struct cgroup_root *root;
 
@@ -5759,18 +5754,18 @@ int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns,
 		 * " (deleted)" is appended to the cgroup path.
 		 */
 		if (cgroup_on_dfl(cgrp) || !(tsk->flags & PF_EXITING)) {
-			path = cgroup_path_ns_locked(cgrp, buf, PATH_MAX,
+			retval = cgroup_path_ns_locked(cgrp, buf, PATH_MAX,
 						current->nsproxy->cgroup_ns);
-			if (!path) {
+			if (retval >= PATH_MAX) {
 				retval = -ENAMETOOLONG;
 				goto out_unlock;
 			}
+
+			seq_puts(m, buf);
 		} else {
-			path = "/";
+			seq_puts(m, "/");
 		}
 
-		seq_puts(m, path);
-
 		if (cgroup_on_dfl(cgrp) && cgroup_is_dead(cgrp))
 			seq_puts(m, " (deleted)\n");
 		else
@@ -6035,8 +6030,9 @@ static void cgroup_release_agent(struct work_struct *work)
 {
 	struct cgroup *cgrp =
 		container_of(work, struct cgroup, release_agent_work);
-	char *pathbuf = NULL, *agentbuf = NULL, *path;
+	char *pathbuf = NULL, *agentbuf = NULL;
 	char *argv[3], *envp[3];
+	int ret;
 
 	mutex_lock(&cgroup_mutex);
 
@@ -6046,13 +6042,13 @@ static void cgroup_release_agent(struct work_struct *work)
 		goto out;
 
 	spin_lock_irq(&css_set_lock);
-	path = cgroup_path_ns_locked(cgrp, pathbuf, PATH_MAX, &init_cgroup_ns);
+	ret = cgroup_path_ns_locked(cgrp, pathbuf, PATH_MAX, &init_cgroup_ns);
 	spin_unlock_irq(&css_set_lock);
-	if (!path)
+	if (ret >= PATH_MAX)
 		goto out;
 
 	argv[0] = agentbuf;
-	argv[1] = path;
+	argv[1] = pathbuf;
 	argv[2] = NULL;
 
 	/* minimal command environment */
-- 
cgit v1.2.3-59-g8ed1b


From ed1777de25e45bfb58fad63341904f8a77911785 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 10 Aug 2016 11:23:44 -0400
Subject: cgroup: add tracepoints for basic operations

Debugging what goes wrong with cgroup setup can get hairy.  Add
tracepoints for cgroup hierarchy mount, cgroup creation/destruction
and task migration operations for better visibility.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/trace/events/cgroup.h | 163 ++++++++++++++++++++++++++++++++++++++++++
 kernel/cgroup.c               |  25 +++++++
 2 files changed, 188 insertions(+)
 create mode 100644 include/trace/events/cgroup.h

(limited to 'kernel/cgroup.c')

diff --git a/include/trace/events/cgroup.h b/include/trace/events/cgroup.h
new file mode 100644
index 000000000000..ab68640a18d0
--- /dev/null
+++ b/include/trace/events/cgroup.h
@@ -0,0 +1,163 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM cgroup
+
+#if !defined(_TRACE_CGROUP_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_CGROUP_H
+
+#include <linux/cgroup.h>
+#include <linux/tracepoint.h>
+
+DECLARE_EVENT_CLASS(cgroup_root,
+
+	TP_PROTO(struct cgroup_root *root),
+
+	TP_ARGS(root),
+
+	TP_STRUCT__entry(
+		__field(	int,		root			)
+		__field(	u16,		ss_mask			)
+		__string(	name,		root->name		)
+	),
+
+	TP_fast_assign(
+		__entry->root = root->hierarchy_id;
+		__entry->ss_mask = root->subsys_mask;
+		__assign_str(name, root->name);
+	),
+
+	TP_printk("root=%d ss_mask=%#x name=%s",
+		  __entry->root, __entry->ss_mask, __get_str(name))
+);
+
+DEFINE_EVENT(cgroup_root, cgroup_setup_root,
+
+	TP_PROTO(struct cgroup_root *root),
+
+	TP_ARGS(root)
+);
+
+DEFINE_EVENT(cgroup_root, cgroup_destroy_root,
+
+	TP_PROTO(struct cgroup_root *root),
+
+	TP_ARGS(root)
+);
+
+DEFINE_EVENT(cgroup_root, cgroup_remount,
+
+	TP_PROTO(struct cgroup_root *root),
+
+	TP_ARGS(root)
+);
+
+DECLARE_EVENT_CLASS(cgroup,
+
+	TP_PROTO(struct cgroup *cgrp),
+
+	TP_ARGS(cgrp),
+
+	TP_STRUCT__entry(
+		__field(	int,		root			)
+		__field(	int,		id			)
+		__field(	int,		level			)
+		__dynamic_array(char,		path,
+				cgrp->kn ? cgroup_path(cgrp, NULL, 0) + 1
+					 : strlen("(null)"))
+	),
+
+	TP_fast_assign(
+		__entry->root = cgrp->root->hierarchy_id;
+		__entry->id = cgrp->id;
+		__entry->level = cgrp->level;
+		if (cgrp->kn)
+			cgroup_path(cgrp, __get_dynamic_array(path),
+				    __get_dynamic_array_len(path));
+		else
+			__assign_str(path, "(null)");
+	),
+
+	TP_printk("root=%d id=%d level=%d path=%s",
+		  __entry->root, __entry->id, __entry->level, __get_str(path))
+);
+
+DEFINE_EVENT(cgroup, cgroup_mkdir,
+
+	TP_PROTO(struct cgroup *cgroup),
+
+	TP_ARGS(cgroup)
+);
+
+DEFINE_EVENT(cgroup, cgroup_rmdir,
+
+	TP_PROTO(struct cgroup *cgroup),
+
+	TP_ARGS(cgroup)
+);
+
+DEFINE_EVENT(cgroup, cgroup_release,
+
+	TP_PROTO(struct cgroup *cgroup),
+
+	TP_ARGS(cgroup)
+);
+
+DEFINE_EVENT(cgroup, cgroup_rename,
+
+	TP_PROTO(struct cgroup *cgroup),
+
+	TP_ARGS(cgroup)
+);
+
+DECLARE_EVENT_CLASS(cgroup_migrate,
+
+	TP_PROTO(struct cgroup *dst_cgrp, struct task_struct *task, bool threadgroup),
+
+	TP_ARGS(dst_cgrp, task, threadgroup),
+
+	TP_STRUCT__entry(
+		__field(	int,		dst_root		)
+		__field(	int,		dst_id			)
+		__field(	int,		dst_level		)
+		__dynamic_array(char,		dst_path,
+				dst_cgrp->kn ? cgroup_path(dst_cgrp, NULL, 0) + 1
+					     : strlen("(null)"))
+		__field(	int,		pid			)
+		__string(	comm,		task->comm		)
+	),
+
+	TP_fast_assign(
+		__entry->dst_root = dst_cgrp->root->hierarchy_id;
+		__entry->dst_id = dst_cgrp->id;
+		__entry->dst_level = dst_cgrp->level;
+		if (dst_cgrp->kn)
+			cgroup_path(dst_cgrp, __get_dynamic_array(dst_path),
+				    __get_dynamic_array_len(dst_path));
+		else
+			__assign_str(dst_path, "(null)");
+		__entry->pid = task->pid;
+		__assign_str(comm, task->comm);
+	),
+
+	TP_printk("dst_root=%d dst_id=%d dst_level=%d dst_path=%s pid=%d comm=%s",
+		  __entry->dst_root, __entry->dst_id, __entry->dst_level,
+		  __get_str(dst_path), __entry->pid, __get_str(comm))
+);
+
+DEFINE_EVENT(cgroup_migrate, cgroup_attach_task,
+
+	TP_PROTO(struct cgroup *dst_cgrp, struct task_struct *task, bool threadgroup),
+
+	TP_ARGS(dst_cgrp, task, threadgroup)
+);
+
+DEFINE_EVENT(cgroup_migrate, cgroup_transfer_tasks,
+
+	TP_PROTO(struct cgroup *dst_cgrp, struct task_struct *task, bool threadgroup),
+
+	TP_ARGS(dst_cgrp, task, threadgroup)
+);
+
+#endif /* _TRACE_CGROUP_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 3861161e460f..5e2e81ad9175 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -64,6 +64,9 @@
 #include <linux/file.h>
 #include <net/sock.h>
 
+#define CREATE_TRACE_POINTS
+#include <trace/events/cgroup.h>
+
 /*
  * pidlists linger the following amount before being destroyed.  The goal
  * is avoiding frequent destruction in the middle of consecutive read calls
@@ -1176,6 +1179,8 @@ static void cgroup_destroy_root(struct cgroup_root *root)
 	struct cgroup *cgrp = &root->cgrp;
 	struct cgrp_cset_link *link, *tmp_link;
 
+	trace_cgroup_destroy_root(root);
+
 	cgroup_lock_and_drain_offline(&cgrp_dfl_root.cgrp);
 
 	BUG_ON(atomic_read(&root->nr_cgrps));
@@ -1874,6 +1879,9 @@ static int cgroup_remount(struct kernfs_root *kf_root, int *flags, char *data)
 		strcpy(root->release_agent_path, opts.release_agent);
 		spin_unlock(&release_agent_path_lock);
 	}
+
+	trace_cgroup_remount(root);
+
  out_unlock:
 	kfree(opts.release_agent);
 	kfree(opts.name);
@@ -2031,6 +2039,8 @@ static int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask)
 	if (ret)
 		goto destroy_root;
 
+	trace_cgroup_setup_root(root);
+
 	/*
 	 * There must be no failure case after here, since rebinding takes
 	 * care of subsystems' refcounts, which are explicitly dropped in
@@ -2825,6 +2835,10 @@ static int cgroup_attach_task(struct cgroup *dst_cgrp,
 		ret = cgroup_migrate(leader, threadgroup, dst_cgrp->root);
 
 	cgroup_migrate_finish(&preloaded_csets);
+
+	if (!ret)
+		trace_cgroup_attach_task(dst_cgrp, leader, threadgroup);
+
 	return ret;
 }
 
@@ -3587,6 +3601,8 @@ static int cgroup_rename(struct kernfs_node *kn, struct kernfs_node *new_parent,
 	mutex_lock(&cgroup_mutex);
 
 	ret = kernfs_rename(kn, new_parent, new_name_str);
+	if (!ret)
+		trace_cgroup_rename(cgrp);
 
 	mutex_unlock(&cgroup_mutex);
 
@@ -4355,6 +4371,8 @@ int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from)
 
 		if (task) {
 			ret = cgroup_migrate(task, false, to->root);
+			if (!ret)
+				trace_cgroup_transfer_tasks(to, task, false);
 			put_task_struct(task);
 		}
 	} while (task && !ret);
@@ -5020,6 +5038,8 @@ static void css_release_work_fn(struct work_struct *work)
 			ss->css_released(css);
 	} else {
 		/* cgroup release path */
+		trace_cgroup_release(cgrp);
+
 		cgroup_idr_remove(&cgrp->root->cgroup_idr, cgrp->id);
 		cgrp->id = -1;
 
@@ -5306,6 +5326,8 @@ static int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name,
 	if (ret)
 		goto out_destroy;
 
+	trace_cgroup_mkdir(cgrp);
+
 	/* let's create and online css's */
 	kernfs_activate(kn);
 
@@ -5481,6 +5503,9 @@ static int cgroup_rmdir(struct kernfs_node *kn)
 
 	ret = cgroup_destroy_locked(cgrp);
 
+	if (!ret)
+		trace_cgroup_rmdir(cgrp);
+
 	cgroup_kn_unlock(kn);
 	return ret;
 }
-- 
cgit v1.2.3-59-g8ed1b


From e0223003e6e141533446d01a92784592a97a8552 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 29 Sep 2016 15:49:40 +0200
Subject: cgroup: fix error handling regressions in proc_cgroup_show() and
 cgroup_release_agent()

4c737b41de7f ("cgroup: make cgroup_path() and friends behave in the
style of strlcpy()") broke error handling in proc_cgroup_show() and
cgroup_release_agent() by not handling negative return values from
cgroup_path_ns_locked().  Fix it.

Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 kernel/cgroup.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'kernel/cgroup.c')

diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 5e2e81ad9175..a7f9fb4e1fc7 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -5781,10 +5781,10 @@ int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns,
 		if (cgroup_on_dfl(cgrp) || !(tsk->flags & PF_EXITING)) {
 			retval = cgroup_path_ns_locked(cgrp, buf, PATH_MAX,
 						current->nsproxy->cgroup_ns);
-			if (retval >= PATH_MAX) {
+			if (retval >= PATH_MAX)
 				retval = -ENAMETOOLONG;
+			if (retval < 0)
 				goto out_unlock;
-			}
 
 			seq_puts(m, buf);
 		} else {
@@ -6069,7 +6069,7 @@ static void cgroup_release_agent(struct work_struct *work)
 	spin_lock_irq(&css_set_lock);
 	ret = cgroup_path_ns_locked(cgrp, pathbuf, PATH_MAX, &init_cgroup_ns);
 	spin_unlock_irq(&css_set_lock);
-	if (ret >= PATH_MAX)
+	if (ret < 0 || ret >= PATH_MAX)
 		goto out;
 
 	argv[0] = agentbuf;
-- 
cgit v1.2.3-59-g8ed1b