From 18896451eaeee497ef5c397d76902c6376a8787d Mon Sep 17 00:00:00 2001
From: David Kershner <david.kershner@unisys.com>
Date: Thu, 6 Aug 2015 15:46:45 -0700
Subject: kthread: export kthread functions

The s-Par visornic driver, currently in staging, processes a queue being
serviced by the an s-Par service partition.  We can get a message that
something has happened with the Service Partition, when that happens, we
must not access the channel until we get a message that the service
partition is back again.

The visornic driver has a thread for processing the channel, when we get
the message, we need to be able to park the thread and then resume it
when the problem clears.

We can do this with kthread_park and unpark but they are not exported
from the kernel, this patch exports the needed functions.

Signed-off-by: David Kershner <david.kershner@unisys.com>
Acked-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Richard Weinberger <richard.weinberger@gmail.com>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/kthread.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'kernel/kthread.c')
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 10e489c448fe..fdea0bee7b5a 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -97,6 +97,7 @@ bool kthread_should_park(void)
 {
 	return test_bit(KTHREAD_SHOULD_PARK, &to_kthread(current)->flags);
 }
+EXPORT_SYMBOL_GPL(kthread_should_park);
 
 /**
  * kthread_freezable_should_stop - should this freezable kthread return now?
@@ -171,6 +172,7 @@ void kthread_parkme(void)
 {
 	__kthread_parkme(to_kthread(current));
 }
+EXPORT_SYMBOL_GPL(kthread_parkme);
 
 static int kthread(void *_create)
 {
@@ -411,6 +413,7 @@ void kthread_unpark(struct task_struct *k)
 	if (kthread)
 		__kthread_unpark(k, kthread);
 }
+EXPORT_SYMBOL_GPL(kthread_unpark);
 
 /**
  * kthread_park - park a thread created by kthread_create().
@@ -441,6 +444,7 @@ int kthread_park(struct task_struct *k)
 	}
 	return ret;
 }
+EXPORT_SYMBOL_GPL(kthread_park);
 
 /**
  * kthread_stop - stop a thread created by kthread_create().
-- 
cgit v1.3-8-gc7d7


From 25834c73f93af7f0712c98ca4593691592e6b360 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 15 May 2015 17:43:34 +0200
Subject: sched: Fix a race between __kthread_bind() and sched_setaffinity()

Because sched_setscheduler() checks p->flags & PF_NO_SETAFFINITY
without locks, a caller might observe an old value and race with the
set_cpus_allowed_ptr() call from __kthread_bind() and effectively undo
it:

	__kthread_bind()
	  do_set_cpus_allowed()
						<SYSCALL>
						  sched_setaffinity()
						    if (p->flags & PF_NO_SETAFFINITIY)
						    set_cpus_allowed_ptr()
	  p->flags |= PF_NO_SETAFFINITY

Fix the bug by putting everything under the regular scheduler locks.

This also closes a hole in the serialization of task_struct::{nr_,}cpus_allowed.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Tejun Heo <tj@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: dedekind1@gmail.com
Cc: juri.lelli@arm.com
Cc: mgorman@suse.de
Cc: riel@redhat.com
Cc: rostedt@goodmis.org
Link: http://lkml.kernel.org/r/20150515154833.545640346@infradead.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/kthread.h |  1 +
 include/linux/sched.h   |  7 -------
 kernel/kthread.c        | 20 +++++++++++++++++---
 kernel/sched/core.c     | 36 ++++++++++++++++++++++++++++++++----
 kernel/workqueue.c      |  6 ++----
 5 files changed, 52 insertions(+), 18 deletions(-)

(limited to 'kernel/kthread.c')

diff --git a/include/linux/kthread.h b/include/linux/kthread.h
index 13d55206ccf6..869b21dcf503 100644
--- a/include/linux/kthread.h
+++ b/include/linux/kthread.h
@@ -38,6 +38,7 @@ struct task_struct *kthread_create_on_cpu(int (*threadfn)(void *data),
 })
 
 void kthread_bind(struct task_struct *k, unsigned int cpu);
+void kthread_bind_mask(struct task_struct *k, const struct cpumask *mask);
 int kthread_stop(struct task_struct *k);
 bool kthread_should_stop(void);
 bool kthread_should_park(void);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 44dca5b35de6..81bb4577274b 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2203,13 +2203,6 @@ static inline void calc_load_enter_idle(void) { }
 static inline void calc_load_exit_idle(void) { }
 #endif /* CONFIG_NO_HZ_COMMON */
 
-#ifndef CONFIG_CPUMASK_OFFSTACK
-static inline int set_cpus_allowed(struct task_struct *p, cpumask_t new_mask)
-{
-	return set_cpus_allowed_ptr(p, &new_mask);
-}
-#endif
-
 /*
  * Do not use outside of architecture code which knows its limitations.
  *
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 10e489c448fe..7c40a189becc 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -325,16 +325,30 @@ struct task_struct *kthread_create_on_node(int (*threadfn)(void *data),
 }
 EXPORT_SYMBOL(kthread_create_on_node);
 
-static void __kthread_bind(struct task_struct *p, unsigned int cpu, long state)
+static void __kthread_bind_mask(struct task_struct *p, const struct cpumask *mask, long state)
 {
-	/* Must have done schedule() in kthread() before we set_task_cpu */
+	unsigned long flags;
+
 	if (!wait_task_inactive(p, state)) {
 		WARN_ON(1);
 		return;
 	}
+
 	/* It's safe because the task is inactive. */
-	do_set_cpus_allowed(p, cpumask_of(cpu));
+	raw_spin_lock_irqsave(&p->pi_lock, flags);
+	do_set_cpus_allowed(p, mask);
 	p->flags |= PF_NO_SETAFFINITY;
+	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
+}
+
+static void __kthread_bind(struct task_struct *p, unsigned int cpu, long state)
+{
+	__kthread_bind_mask(p, cpumask_of(cpu), state);
+}
+
+void kthread_bind_mask(struct task_struct *p, const struct cpumask *mask)
+{
+	__kthread_bind_mask(p, mask, TASK_UNINTERRUPTIBLE);
 }
 
 /**
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index ea6d74345e60..2e3b983da836 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1153,6 +1153,8 @@ static int migration_cpu_stop(void *data)
 
 void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
 {
+	lockdep_assert_held(&p->pi_lock);
+
 	if (p->sched_class->set_cpus_allowed)
 		p->sched_class->set_cpus_allowed(p, new_mask);
 
@@ -1169,7 +1171,8 @@ void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
  * task must not exit() & deallocate itself prematurely. The
  * call is not atomic; no spinlocks may be held.
  */
-int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
+static int __set_cpus_allowed_ptr(struct task_struct *p,
+				  const struct cpumask *new_mask, bool check)
 {
 	unsigned long flags;
 	struct rq *rq;
@@ -1178,6 +1181,15 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
 
 	rq = task_rq_lock(p, &flags);
 
+	/*
+	 * Must re-check here, to close a race against __kthread_bind(),
+	 * sched_setaffinity() is not guaranteed to observe the flag.
+	 */
+	if (check && (p->flags & PF_NO_SETAFFINITY)) {
+		ret = -EINVAL;
+		goto out;
+	}
+
 	if (cpumask_equal(&p->cpus_allowed, new_mask))
 		goto out;
 
@@ -1214,6 +1226,11 @@ out:
 
 	return ret;
 }
+
+int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
+{
+	return __set_cpus_allowed_ptr(p, new_mask, false);
+}
 EXPORT_SYMBOL_GPL(set_cpus_allowed_ptr);
 
 void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
@@ -1595,6 +1612,15 @@ static void update_avg(u64 *avg, u64 sample)
 	s64 diff = sample - *avg;
 	*avg += diff >> 3;
 }
+
+#else
+
+static inline int __set_cpus_allowed_ptr(struct task_struct *p,
+					 const struct cpumask *new_mask, bool check)
+{
+	return set_cpus_allowed_ptr(p, new_mask);
+}
+
 #endif /* CONFIG_SMP */
 
 static void
@@ -4340,7 +4366,7 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask)
 	}
 #endif
 again:
-	retval = set_cpus_allowed_ptr(p, new_mask);
+	retval = __set_cpus_allowed_ptr(p, new_mask, true);
 
 	if (!retval) {
 		cpuset_cpus_allowed(p, cpus_allowed);
@@ -4865,7 +4891,8 @@ void init_idle(struct task_struct *idle, int cpu)
 	struct rq *rq = cpu_rq(cpu);
 	unsigned long flags;
 
-	raw_spin_lock_irqsave(&rq->lock, flags);
+	raw_spin_lock_irqsave(&idle->pi_lock, flags);
+	raw_spin_lock(&rq->lock);
 
 	__sched_fork(0, idle);
 	idle->state = TASK_RUNNING;
@@ -4891,7 +4918,8 @@ void init_idle(struct task_struct *idle, int cpu)
 #if defined(CONFIG_SMP)
 	idle->on_cpu = 1;
 #endif
-	raw_spin_unlock_irqrestore(&rq->lock, flags);
+	raw_spin_unlock(&rq->lock);
+	raw_spin_unlock_irqrestore(&idle->pi_lock, flags);
 
 	/* Set the preempt count _outside_ the spinlocks! */
 	init_idle_preempt_count(idle, cpu);
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 4c4f06176f74..f5782d5fd196 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -1714,9 +1714,7 @@ static struct worker *create_worker(struct worker_pool *pool)
 		goto fail;
 
 	set_user_nice(worker->task, pool->attrs->nice);
-
-	/* prevent userland from meddling with cpumask of workqueue workers */
-	worker->task->flags |= PF_NO_SETAFFINITY;
+	kthread_bind_mask(worker->task, pool->attrs->cpumask);
 
 	/* successful, attach the worker to the pool */
 	worker_attach_to_pool(worker, pool);
@@ -3856,7 +3854,7 @@ struct workqueue_struct *__alloc_workqueue_key(const char *fmt,
 		}
 
 		wq->rescuer = rescuer;
-		rescuer->task->flags |= PF_NO_SETAFFINITY;
+		kthread_bind_mask(rescuer->task, cpu_possible_mask);
 		wake_up_process(rescuer->task);
 	}
 
-- 
cgit v1.3-8-gc7d7


From e9f069868d60550c4b46f084ac9276a57c1b4711 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Fri, 4 Sep 2015 15:42:42 -0700
Subject: kernel/kthread.c:kthread_create_on_node(): clarify documentation

- Make it clear that the `node' arg refers to memory allocations only:
  kthread_create_on_node() does not pin the new thread to that node's
  CPUs.

- Encourage the use of NUMA_NO_NODE.

[nzimmer@sgi.com: use NUMA_NO_NODE in kthread_create() also]
Cc: Nathan Zimmer <nzimmer@sgi.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Eric Dumazet <edumazet@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kthread.h | 2 +-
 kernel/kthread.c        | 7 ++++---
 2 files changed, 5 insertions(+), 4 deletions(-)

(limited to 'kernel/kthread.c')

diff --git a/include/linux/kthread.h b/include/linux/kthread.h
index 869b21dcf503..e691b6a23f72 100644
--- a/include/linux/kthread.h
+++ b/include/linux/kthread.h
@@ -11,7 +11,7 @@ struct task_struct *kthread_create_on_node(int (*threadfn)(void *data),
 					   const char namefmt[], ...);
 
 #define kthread_create(threadfn, data, namefmt, arg...) \
-	kthread_create_on_node(threadfn, data, -1, namefmt, ##arg)
+	kthread_create_on_node(threadfn, data, NUMA_NO_NODE, namefmt, ##arg)
 
 
 struct task_struct *kthread_create_on_cpu(int (*threadfn)(void *data),
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 490924cc9e7c..9ff173dca1ae 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -248,15 +248,16 @@ static void create_kthread(struct kthread_create_info *create)
  * kthread_create_on_node - create a kthread.
  * @threadfn: the function to run until signal_pending(current).
  * @data: data ptr for @threadfn.
- * @node: memory node number.
+ * @node: task and thread structures for the thread are allocated on this node
  * @namefmt: printf-style name for the thread.
  *
  * Description: This helper function creates and names a kernel
  * thread.  The thread will be stopped: use wake_up_process() to start
- * it.  See also kthread_run().
+ * it.  See also kthread_run().  The new thread has SCHED_NORMAL policy and
+ * is affine to all CPUs.
  *
  * If thread is going to be bound on a particular cpu, give its node
- * in @node, to get NUMA affinity for kthread stack, or else give -1.
+ * in @node, to get NUMA affinity for kthread stack, or else give NUMA_NO_NODE.
  * When woken, the thread will run @threadfn() with @data as its
  * argument. @threadfn() can either call do_exit() directly if it is a
  * standalone thread for which no one will call kthread_stop(), or
-- 
cgit v1.3-8-gc7d7