From 8239c25f47d2b318156993b15f33900a86ea5e17 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 20 Apr 2012 13:05:42 +0000
Subject: smp: Add task_struct argument to __cpu_up()

Preparatory patch to make the idle thread allocation for secondary
cpus generic.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
Cc: Matt Turner <mattst88@gmail.com>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: Mike Frysinger <vapier@gentoo.org>
Cc: Jesper Nilsson <jesper.nilsson@axis.com>
Cc: Richard Kuo <rkuo@codeaurora.org>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Hirokazu Takata <takata@linux-m32r.org>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: David Howells <dhowells@redhat.com>
Cc: James E.J. Bottomley <jejb@parisc-linux.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: David S. Miller <davem@davemloft.net>
Cc: Chris Metcalf <cmetcalf@tilera.com>
Cc: Richard Weinberger <richard@nod.at>
Cc: x86@kernel.org
Link: http://lkml.kernel.org/r/20120420124556.964170564@linutronix.de
---
 kernel/cpu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'kernel/cpu.c')

diff --git a/kernel/cpu.c b/kernel/cpu.c
index 2060c6e57027..e711aef0fb3c 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -309,7 +309,7 @@ static int __cpuinit _cpu_up(unsigned int cpu, int tasks_frozen)
 	}
 
 	/* Arch-specific enabling code. */
-	ret = __cpu_up(cpu);
+	ret = __cpu_up(cpu, NULL);
 	if (ret != 0)
 		goto out_notify;
 	BUG_ON(!cpu_online(cpu));
-- 
cgit v1.3-8-gc7d7


From 38498a67aa2cf8c80754b8d304bfacc10bc582b5 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 20 Apr 2012 13:05:44 +0000
Subject: smp: Add generic smpboot facility

Start a new file, which will hold SMP and CPU hotplug related generic
infrastructure.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
Cc: Matt Turner <mattst88@gmail.com>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: Mike Frysinger <vapier@gentoo.org>
Cc: Jesper Nilsson <jesper.nilsson@axis.com>
Cc: Richard Kuo <rkuo@codeaurora.org>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Hirokazu Takata <takata@linux-m32r.org>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: David Howells <dhowells@redhat.com>
Cc: James E.J. Bottomley <jejb@parisc-linux.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: David S. Miller <davem@davemloft.net>
Cc: Chris Metcalf <cmetcalf@tilera.com>
Cc: Richard Weinberger <richard@nod.at>
Cc: x86@kernel.org
Link: http://lkml.kernel.org/r/20120420124557.035417523@linutronix.de
---
 kernel/Makefile  |  1 +
 kernel/cpu.c     |  8 ++++++++
 kernel/smpboot.c | 14 ++++++++++++++
 kernel/smpboot.h |  6 ++++++
 4 files changed, 29 insertions(+)
 create mode 100644 kernel/smpboot.c
 create mode 100644 kernel/smpboot.h

(limited to 'kernel/cpu.c')

diff --git a/kernel/Makefile b/kernel/Makefile
index cb41b9547c9f..6c07f30fa9b7 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -43,6 +43,7 @@ obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o
 obj-$(CONFIG_RT_MUTEX_TESTER) += rtmutex-tester.o
 obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o
 obj-$(CONFIG_SMP) += smp.o
+obj-$(CONFIG_SMP) += smpboot.o
 ifneq ($(CONFIG_SMP),y)
 obj-y += up.o
 endif
diff --git a/kernel/cpu.c b/kernel/cpu.c
index e711aef0fb3c..e58b99ada3d8 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -17,6 +17,8 @@
 #include <linux/gfp.h>
 #include <linux/suspend.h>
 
+#include "smpboot.h"
+
 #ifdef CONFIG_SMP
 /* Serializes the updates to cpu_online_mask, cpu_present_mask */
 static DEFINE_MUTEX(cpu_add_remove_lock);
@@ -300,6 +302,11 @@ static int __cpuinit _cpu_up(unsigned int cpu, int tasks_frozen)
 		return -EINVAL;
 
 	cpu_hotplug_begin();
+
+	ret = smpboot_prepare(cpu);
+	if (ret)
+		goto out;
+
 	ret = __cpu_notify(CPU_UP_PREPARE | mod, hcpu, -1, &nr_calls);
 	if (ret) {
 		nr_calls--;
@@ -320,6 +327,7 @@ static int __cpuinit _cpu_up(unsigned int cpu, int tasks_frozen)
 out_notify:
 	if (ret != 0)
 		__cpu_notify(CPU_UP_CANCELED | mod, hcpu, nr_calls, NULL);
+out:
 	cpu_hotplug_done();
 
 	return ret;
diff --git a/kernel/smpboot.c b/kernel/smpboot.c
new file mode 100644
index 000000000000..6dae6a3d2d59
--- /dev/null
+++ b/kernel/smpboot.c
@@ -0,0 +1,14 @@
+/*
+ * Common SMP CPU bringup/teardown functions
+ */
+#include <linux/init.h>
+
+#include "smpboot.h"
+
+/**
+ * smpboot_prepare - generic smpboot preparation
+ */
+int __cpuinit smpboot_prepare(unsigned int cpu)
+{
+	return 0;
+}
diff --git a/kernel/smpboot.h b/kernel/smpboot.h
new file mode 100644
index 000000000000..d88e77165086
--- /dev/null
+++ b/kernel/smpboot.h
@@ -0,0 +1,6 @@
+#ifndef SMPBOOT_H
+#define SMPBOOT_H
+
+int smpboot_prepare(unsigned int cpu);
+
+#endif
-- 
cgit v1.3-8-gc7d7


From 29d5e0476e1c4a513859e7858845ad172f560389 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 20 Apr 2012 13:05:45 +0000
Subject: smp: Provide generic idle thread allocation

All SMP architectures have magic to fork the idle task and to store it
for reusage when cpu hotplug is enabled. Provide a generic
infrastructure for it.

Create/reinit the idle thread for the cpu which is brought up in the
generic code and hand the thread pointer to the architecture code via
__cpu_up().

Note, that fork_idle() is called via a workqueue, because this
guarantees that the idle thread does not get a reference to a user
space VM. This can happen when the boot process did not bring up all
possible cpus and a later cpu_up() is initiated via the sysfs
interface. In that case fork_idle() would be called in the context of
the user space task and take a reference on the user space VM.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
Cc: Matt Turner <mattst88@gmail.com>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: Mike Frysinger <vapier@gentoo.org>
Cc: Jesper Nilsson <jesper.nilsson@axis.com>
Cc: Richard Kuo <rkuo@codeaurora.org>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Hirokazu Takata <takata@linux-m32r.org>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: David Howells <dhowells@redhat.com>
Cc: James E.J. Bottomley <jejb@parisc-linux.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: David S. Miller <davem@davemloft.net>
Cc: Chris Metcalf <cmetcalf@tilera.com>
Cc: Richard Weinberger <richard@nod.at>
Cc: x86@kernel.org
Acked-by: Venkatesh Pallipadi <venki@google.com>
Link: http://lkml.kernel.org/r/20120420124557.102478630@linutronix.de
---
 arch/Kconfig        |  3 ++
 kernel/cpu.c        |  2 +-
 kernel/sched/core.c |  2 ++
 kernel/smpboot.c    | 84 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
 kernel/smpboot.h    | 10 +++++++
 5 files changed, 99 insertions(+), 2 deletions(-)

(limited to 'kernel/cpu.c')

diff --git a/arch/Kconfig b/arch/Kconfig
index 684eb5af439d..4f0d0f7c8313 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -145,6 +145,9 @@ config HAVE_DMA_ATTRS
 config USE_GENERIC_SMP_HELPERS
 	bool
 
+config GENERIC_SMP_IDLE_THREAD
+       bool
+
 config HAVE_REGS_AND_STACK_ACCESS_API
 	bool
 	help
diff --git a/kernel/cpu.c b/kernel/cpu.c
index e58b99ada3d8..05c46bae5e55 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -316,7 +316,7 @@ static int __cpuinit _cpu_up(unsigned int cpu, int tasks_frozen)
 	}
 
 	/* Arch-specific enabling code. */
-	ret = __cpu_up(cpu, NULL);
+	ret = __cpu_up(cpu, idle_thread_get(cpu));
 	if (ret != 0)
 		goto out_notify;
 	BUG_ON(!cpu_online(cpu));
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 4603b9d8f30a..6a63cde23d03 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -83,6 +83,7 @@
 
 #include "sched.h"
 #include "../workqueue_sched.h"
+#include "../smpboot.h"
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/sched.h>
@@ -7049,6 +7050,7 @@ void __init sched_init(void)
 	/* May be allocated at isolcpus cmdline parse time */
 	if (cpu_isolated_map == NULL)
 		zalloc_cpumask_var(&cpu_isolated_map, GFP_NOWAIT);
+	idle_thread_set_boot_cpu();
 #endif
 	init_sched_fair_class();
 
diff --git a/kernel/smpboot.c b/kernel/smpboot.c
index 6dae6a3d2d59..ed1576981801 100644
--- a/kernel/smpboot.c
+++ b/kernel/smpboot.c
@@ -1,14 +1,96 @@
 /*
  * Common SMP CPU bringup/teardown functions
  */
+#include <linux/err.h>
+#include <linux/smp.h>
 #include <linux/init.h>
+#include <linux/sched.h>
+#include <linux/percpu.h>
+#include <linux/workqueue.h>
 
 #include "smpboot.h"
 
+#ifdef CONFIG_GENERIC_SMP_IDLE_THREAD
+struct create_idle {
+	struct work_struct	work;
+	struct task_struct	*idle;
+	struct completion	done;
+	unsigned int cpu;
+};
+
+static void __cpuinit do_fork_idle(struct work_struct *work)
+{
+	struct create_idle *c = container_of(work, struct create_idle, work);
+
+	c->idle = fork_idle(c->cpu);
+	complete(&c->done);
+}
+
+static struct task_struct * __cpuinit idle_thread_create(unsigned int cpu)
+{
+	struct create_idle c_idle = {
+		.cpu	= cpu,
+		.done	= COMPLETION_INITIALIZER_ONSTACK(c_idle.done),
+	};
+
+	INIT_WORK_ONSTACK(&c_idle.work, do_fork_idle);
+	schedule_work(&c_idle.work);
+	wait_for_completion(&c_idle.done);
+	destroy_work_on_stack(&c_idle.work);
+	return c_idle.idle;
+}
+
+/*
+ * For the hotplug case we keep the task structs around and reuse
+ * them.
+ */
+static DEFINE_PER_CPU(struct task_struct *, idle_threads);
+
+static inline struct task_struct *get_idle_for_cpu(unsigned int cpu)
+{
+	struct task_struct *tsk = per_cpu(idle_threads, cpu);
+
+	if (!tsk)
+		return idle_thread_create(cpu);
+	init_idle(tsk, cpu);
+	return tsk;
+}
+
+struct task_struct * __cpuinit idle_thread_get(unsigned int cpu)
+{
+	return per_cpu(idle_threads, cpu);
+}
+
+void __init idle_thread_set_boot_cpu(void)
+{
+	per_cpu(idle_threads, smp_processor_id()) = current;
+}
+
+/**
+ * idle_thread_init - Initialize the idle thread for a cpu
+ * @cpu:	The cpu for which the idle thread should be initialized
+ *
+ * Creates the thread if it does not exist.
+ */
+static int __cpuinit idle_thread_init(unsigned int cpu)
+{
+	struct task_struct *idle = get_idle_for_cpu(cpu);
+
+	if (IS_ERR(idle)) {
+		printk(KERN_ERR "failed fork for CPU %u\n", cpu);
+		return PTR_ERR(idle);
+	}
+	per_cpu(idle_threads, cpu) = idle;
+	return 0;
+}
+#else
+static inline int idle_thread_init(unsigned int cpu) { return 0; }
+#endif
+
 /**
  * smpboot_prepare - generic smpboot preparation
  */
 int __cpuinit smpboot_prepare(unsigned int cpu)
 {
-	return 0;
+	return idle_thread_init(cpu);
 }
diff --git a/kernel/smpboot.h b/kernel/smpboot.h
index d88e77165086..7943bbbab917 100644
--- a/kernel/smpboot.h
+++ b/kernel/smpboot.h
@@ -1,6 +1,16 @@
 #ifndef SMPBOOT_H
 #define SMPBOOT_H
 
+struct task_struct;
+
 int smpboot_prepare(unsigned int cpu);
 
+#ifdef CONFIG_GENERIC_SMP_IDLE_THREAD
+struct task_struct *idle_thread_get(unsigned int cpu);
+void idle_thread_set_boot_cpu(void);
+#else
+static inline struct task_struct *idle_thread_get(unsigned int cpu) { return NULL; }
+static inline void idle_thread_set_boot_cpu(void) { }
+#endif
+
 #endif
-- 
cgit v1.3-8-gc7d7


From 3bb5d2ee396aabaa4e318f17e94d13e2ee0e5a88 Mon Sep 17 00:00:00 2001
From: Suresh Siddha <suresh.b.siddha@intel.com>
Date: Fri, 20 Apr 2012 17:08:50 -0700
Subject: smp, idle: Allocate idle thread for each possible cpu during boot

percpu areas are already allocated during boot for each possible cpu.
percpu idle threads can be considered as an extension of the percpu areas,
and allocate them for each possible cpu during boot.

This will eliminate the need for workqueue based idle thread allocation.
In future we can move the idle thread area into the percpu area too.

[ tglx: Moved the loop into smpboot.c and added an error check when
  the init code failed to allocate an idle thread for a cpu which
  should be onlined ]

Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: venki@google.com
Link: http://lkml.kernel.org/r/1334966930.28674.245.camel@sbsiddha-desk.sc.intel.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/cpu.c     |  9 ++++---
 kernel/smp.c     |  4 ++++
 kernel/smpboot.c | 72 +++++++++++++++-----------------------------------------
 kernel/smpboot.h |  2 ++
 4 files changed, 31 insertions(+), 56 deletions(-)

(limited to 'kernel/cpu.c')

diff --git a/kernel/cpu.c b/kernel/cpu.c
index 05c46bae5e55..0e6353cf147a 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -297,15 +297,18 @@ static int __cpuinit _cpu_up(unsigned int cpu, int tasks_frozen)
 	int ret, nr_calls = 0;
 	void *hcpu = (void *)(long)cpu;
 	unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0;
+	struct task_struct *idle;
 
 	if (cpu_online(cpu) || !cpu_present(cpu))
 		return -EINVAL;
 
 	cpu_hotplug_begin();
 
-	ret = smpboot_prepare(cpu);
-	if (ret)
+	idle = idle_thread_get(cpu);
+	if (IS_ERR(idle)) {
+		ret = PTR_ERR(idle);
 		goto out;
+	}
 
 	ret = __cpu_notify(CPU_UP_PREPARE | mod, hcpu, -1, &nr_calls);
 	if (ret) {
@@ -316,7 +319,7 @@ static int __cpuinit _cpu_up(unsigned int cpu, int tasks_frozen)
 	}
 
 	/* Arch-specific enabling code. */
-	ret = __cpu_up(cpu, idle_thread_get(cpu));
+	ret = __cpu_up(cpu, idle);
 	if (ret != 0)
 		goto out_notify;
 	BUG_ON(!cpu_online(cpu));
diff --git a/kernel/smp.c b/kernel/smp.c
index 2f8b10ecf759..a61294c07f3f 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -13,6 +13,8 @@
 #include <linux/smp.h>
 #include <linux/cpu.h>
 
+#include "smpboot.h"
+
 #ifdef CONFIG_USE_GENERIC_SMP_HELPERS
 static struct {
 	struct list_head	queue;
@@ -669,6 +671,8 @@ void __init smp_init(void)
 {
 	unsigned int cpu;
 
+	idle_threads_init();
+
 	/* FIXME: This should be done in userspace --RR */
 	for_each_present_cpu(cpu) {
 		if (num_online_cpus() >= setup_max_cpus)
diff --git a/kernel/smpboot.c b/kernel/smpboot.c
index ed1576981801..e1a797e028a3 100644
--- a/kernel/smpboot.c
+++ b/kernel/smpboot.c
@@ -6,64 +6,42 @@
 #include <linux/init.h>
 #include <linux/sched.h>
 #include <linux/percpu.h>
-#include <linux/workqueue.h>
 
 #include "smpboot.h"
 
 #ifdef CONFIG_GENERIC_SMP_IDLE_THREAD
-struct create_idle {
-	struct work_struct	work;
-	struct task_struct	*idle;
-	struct completion	done;
-	unsigned int cpu;
-};
-
-static void __cpuinit do_fork_idle(struct work_struct *work)
-{
-	struct create_idle *c = container_of(work, struct create_idle, work);
-
-	c->idle = fork_idle(c->cpu);
-	complete(&c->done);
-}
-
-static struct task_struct * __cpuinit idle_thread_create(unsigned int cpu)
-{
-	struct create_idle c_idle = {
-		.cpu	= cpu,
-		.done	= COMPLETION_INITIALIZER_ONSTACK(c_idle.done),
-	};
-
-	INIT_WORK_ONSTACK(&c_idle.work, do_fork_idle);
-	schedule_work(&c_idle.work);
-	wait_for_completion(&c_idle.done);
-	destroy_work_on_stack(&c_idle.work);
-	return c_idle.idle;
-}
-
 /*
  * For the hotplug case we keep the task structs around and reuse
  * them.
  */
 static DEFINE_PER_CPU(struct task_struct *, idle_threads);
 
-static inline struct task_struct *get_idle_for_cpu(unsigned int cpu)
+struct task_struct * __cpuinit idle_thread_get(unsigned int cpu)
 {
 	struct task_struct *tsk = per_cpu(idle_threads, cpu);
 
 	if (!tsk)
-		return idle_thread_create(cpu);
+		return ERR_PTR(-ENOMEM);
 	init_idle(tsk, cpu);
 	return tsk;
 }
 
-struct task_struct * __cpuinit idle_thread_get(unsigned int cpu)
+void __init idle_thread_set_boot_cpu(void)
 {
-	return per_cpu(idle_threads, cpu);
+	per_cpu(idle_threads, smp_processor_id()) = current;
 }
 
-void __init idle_thread_set_boot_cpu(void)
+static inline void idle_init(unsigned int cpu)
 {
-	per_cpu(idle_threads, smp_processor_id()) = current;
+	struct task_struct *tsk = per_cpu(idle_threads, cpu);
+
+	if (!tsk) {
+		tsk = fork_idle(cpu);
+		if (IS_ERR(tsk))
+			pr_err("SMP: fork_idle() failed for CPU %u\n", cpu);
+		else
+			per_cpu(idle_threads, cpu) = tsk;
+	}
 }
 
 /**
@@ -72,25 +50,13 @@ void __init idle_thread_set_boot_cpu(void)
  *
  * Creates the thread if it does not exist.
  */
-static int __cpuinit idle_thread_init(unsigned int cpu)
+void __init idle_threads_init(void)
 {
-	struct task_struct *idle = get_idle_for_cpu(cpu);
+	unsigned int cpu;
 
-	if (IS_ERR(idle)) {
-		printk(KERN_ERR "failed fork for CPU %u\n", cpu);
-		return PTR_ERR(idle);
+	for_each_possible_cpu(cpu) {
+		if (cpu != smp_processor_id())
+			idle_init(cpu);
 	}
-	per_cpu(idle_threads, cpu) = idle;
-	return 0;
 }
-#else
-static inline int idle_thread_init(unsigned int cpu) { return 0; }
 #endif
-
-/**
- * smpboot_prepare - generic smpboot preparation
- */
-int __cpuinit smpboot_prepare(unsigned int cpu)
-{
-	return idle_thread_init(cpu);
-}
diff --git a/kernel/smpboot.h b/kernel/smpboot.h
index 7943bbbab917..4cfbcb8a8362 100644
--- a/kernel/smpboot.h
+++ b/kernel/smpboot.h
@@ -8,9 +8,11 @@ int smpboot_prepare(unsigned int cpu);
 #ifdef CONFIG_GENERIC_SMP_IDLE_THREAD
 struct task_struct *idle_thread_get(unsigned int cpu);
 void idle_thread_set_boot_cpu(void);
+void idle_threads_init(void);
 #else
 static inline struct task_struct *idle_thread_get(unsigned int cpu) { return NULL; }
 static inline void idle_thread_set_boot_cpu(void) { }
+static inline void idle_threads_init(unsigned int cpu) { }
 #endif
 
 #endif
-- 
cgit v1.3-8-gc7d7


From cb79295e20a8088a2fd6a9b3cb5f2d889ec36b4d Mon Sep 17 00:00:00 2001
From: Anton Vorontsov <anton.vorontsov@linaro.org>
Date: Thu, 31 May 2012 16:26:22 -0700
Subject: cpu: introduce clear_tasks_mm_cpumask() helper

Many architectures clear tasks' mm_cpumask like this:

	read_lock(&tasklist_lock);
	for_each_process(p) {
		if (p->mm)
			cpumask_clear_cpu(cpu, mm_cpumask(p->mm));
	}
	read_unlock(&tasklist_lock);

Depending on the context, the code above may have several problems,
such as:

1. Working with task->mm w/o getting mm or grabing the task lock is
   dangerous as ->mm might disappear (exit_mm() assigns NULL under
   task_lock(), so tasklist lock is not enough).

2. Checking for process->mm is not enough because process' main
   thread may exit or detach its mm via use_mm(), but other threads
   may still have a valid mm.

This patch implements a small helper function that does things
correctly, i.e.:

1. We take the task's lock while whe handle its mm (we can't use
   get_task_mm()/mmput() pair as mmput() might sleep);

2. To catch exited main thread case, we use find_lock_task_mm(),
   which walks up all threads and returns an appropriate task
   (with task lock held).

Also, Per Peter Zijlstra's idea, now we don't grab tasklist_lock in
the new helper, instead we take the rcu read lock. We can do this
because the function is called after the cpu is taken down and marked
offline, so no new tasks will get this cpu set in their mm mask.

Signed-off-by: Anton Vorontsov <anton.vorontsov@linaro.org>
Cc: Richard Weinberger <richard@nod.at>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Russell King <rmk@arm.linux.org.uk>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Mike Frysinger <vapier@gentoo.org>
Cc: Paul Mundt <lethal@linux-sh.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/cpu.h |  1 +
 kernel/cpu.c        | 26 ++++++++++++++++++++++++++
 2 files changed, 27 insertions(+)

(limited to 'kernel/cpu.c')

diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index 7230bb59a06f..2e9b9ebbeb78 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -177,6 +177,7 @@ extern void put_online_cpus(void);
 #define hotcpu_notifier(fn, pri)	cpu_notifier(fn, pri)
 #define register_hotcpu_notifier(nb)	register_cpu_notifier(nb)
 #define unregister_hotcpu_notifier(nb)	unregister_cpu_notifier(nb)
+void clear_tasks_mm_cpumask(int cpu);
 int cpu_down(unsigned int cpu);
 
 #ifdef CONFIG_ARCH_CPU_PROBE_RELEASE
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 0e6353cf147a..0575197deb4a 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -10,6 +10,8 @@
 #include <linux/sched.h>
 #include <linux/unistd.h>
 #include <linux/cpu.h>
+#include <linux/oom.h>
+#include <linux/rcupdate.h>
 #include <linux/export.h>
 #include <linux/kthread.h>
 #include <linux/stop_machine.h>
@@ -173,6 +175,30 @@ void __ref unregister_cpu_notifier(struct notifier_block *nb)
 }
 EXPORT_SYMBOL(unregister_cpu_notifier);
 
+void clear_tasks_mm_cpumask(int cpu)
+{
+	struct task_struct *p;
+
+	/*
+	 * This function is called after the cpu is taken down and marked
+	 * offline, so its not like new tasks will ever get this cpu set in
+	 * their mm mask. -- Peter Zijlstra
+	 * Thus, we may use rcu_read_lock() here, instead of grabbing
+	 * full-fledged tasklist_lock.
+	 */
+	rcu_read_lock();
+	for_each_process(p) {
+		struct task_struct *t;
+
+		t = find_lock_task_mm(p);
+		if (!t)
+			continue;
+		cpumask_clear_cpu(cpu, mm_cpumask(t->mm));
+		task_unlock(t);
+	}
+	rcu_read_unlock();
+}
+
 static inline void check_for_tasks(int cpu)
 {
 	struct task_struct *p;
-- 
cgit v1.3-8-gc7d7


From e4cc2f873ad0833aa5c4aca56bebe15b9603a1e7 Mon Sep 17 00:00:00 2001
From: Anton Vorontsov <anton.vorontsov@linaro.org>
Date: Thu, 31 May 2012 16:26:26 -0700
Subject: kernel/cpu.c: document clear_tasks_mm_cpumask()

Add more comments on clear_tasks_mm_cpumask, plus adds a runtime check:
the function is only suitable for offlined CPUs, and if called
inappropriately, the kernel should scream aloud.

[akpm@linux-foundation.org: tweak comment: s/walks up/walks/, use 80 cols]
Suggested-by: Andrew Morton <akpm@linux-foundation.org>
Suggested-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Anton Vorontsov <anton.vorontsov@linaro.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/cpu.c | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

(limited to 'kernel/cpu.c')

diff --git a/kernel/cpu.c b/kernel/cpu.c
index 0575197deb4a..a4eb5227a19e 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -13,6 +13,7 @@
 #include <linux/oom.h>
 #include <linux/rcupdate.h>
 #include <linux/export.h>
+#include <linux/bug.h>
 #include <linux/kthread.h>
 #include <linux/stop_machine.h>
 #include <linux/mutex.h>
@@ -175,6 +176,18 @@ void __ref unregister_cpu_notifier(struct notifier_block *nb)
 }
 EXPORT_SYMBOL(unregister_cpu_notifier);
 
+/**
+ * clear_tasks_mm_cpumask - Safely clear tasks' mm_cpumask for a CPU
+ * @cpu: a CPU id
+ *
+ * This function walks all processes, finds a valid mm struct for each one and
+ * then clears a corresponding bit in mm's cpumask.  While this all sounds
+ * trivial, there are various non-obvious corner cases, which this function
+ * tries to solve in a safe manner.
+ *
+ * Also note that the function uses a somewhat relaxed locking scheme, so it may
+ * be called only for an already offlined CPU.
+ */
 void clear_tasks_mm_cpumask(int cpu)
 {
 	struct task_struct *p;
@@ -186,10 +199,15 @@ void clear_tasks_mm_cpumask(int cpu)
 	 * Thus, we may use rcu_read_lock() here, instead of grabbing
 	 * full-fledged tasklist_lock.
 	 */
+	WARN_ON(cpu_online(cpu));
 	rcu_read_lock();
 	for_each_process(p) {
 		struct task_struct *t;
 
+		/*
+		 * Main thread might exit, but other threads may still have
+		 * a valid mm. Find one.
+		 */
 		t = find_lock_task_mm(p);
 		if (!t)
 			continue;
-- 
cgit v1.3-8-gc7d7